This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6b5cef3ea51 [opt](nereids) range inference convert isnull to EmptyValue (#45310) 6b5cef3ea51 is described below commit 6b5cef3ea5105f2ff85bb7bb0b711b305dfd54b2 Author: yujun <yu...@selectdb.com> AuthorDate: Mon Dec 16 18:29:17 2024 +0800 [opt](nereids) range inference convert isnull to EmptyValue (#45310) Problem Summary: for sql a > 10 and a < 20 or a > 30 and a < 40 or a > 60 and a < 50 AddMinMax should opt it as: (a < 20 or a > 30 or a is null and null) and a > 10 and a < 40 But it fail, because the SimplifyRange will opt it as a > 10 and a < 20 or a > 30 and a < 40 or a is null and null since the 3rd is `a is null and null`, it couldn't get a's range and suppose a's range is [-00, +00], then the whole expression, it will get a's range is [-00, +00], to fix this problem, RangeInference need to convert `a is null and null` to EmptyValue instead of UnknownValue. --- .../nereids/rules/expression/rules/AddMinMax.java | 12 ++++++- .../rules/expression/rules/RangeInference.java | 26 +++++++++++++- .../rules/expression/rules/SimplifyRange.java | 30 ++++++---------- .../rules/expression/ExpressionRewriteTest.java | 42 +++++++++++++++++++++- 4 files changed, 88 insertions(+), 22 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java index 639616244ec..c1efb82bac6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java @@ -326,7 +326,17 @@ public class AddMinMax implements ExpressionPatternRuleFactory { for (int i = 1; i < sourceValues.size(); i++) { // process in sourceValues[i] Map<Expression, MinMaxValue> minMaxValues = getExprMinMaxValues(sourceValues.get(i)); - for (Map.Entry<Expression, MinMaxValue> entry : minMaxValues.entrySet()) { + // merge values of sourceValues[i] into result. + // also keep the value's relative order in sourceValues[i]. + // for example, if a and b in sourceValues[i], but not in result, then during merging, + // a and b will assign a new exprOrderIndex (using nextExprOrderIndex). + // if in sourceValues[i], a's exprOrderIndex < b's exprOrderIndex, + // then make sure in result, a's new exprOrderIndex < b's new exprOrderIndex. + // so that their relative order can preserve. + List<Map.Entry<Expression, MinMaxValue>> minMaxValueList = minMaxValues.entrySet().stream() + .sorted((a, b) -> Integer.compare(a.getValue().exprOrderIndex, b.getValue().exprOrderIndex)) + .collect(Collectors.toList()); + for (Map.Entry<Expression, MinMaxValue> entry : minMaxValueList) { Expression expr = entry.getKey(); MinMaxValue value = result.get(expr); MinMaxValue otherValue = entry.getValue(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java index 74099df1123..a94444e1607 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java @@ -25,10 +25,12 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.GreaterThan; import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; import org.apache.doris.nereids.trees.expressions.InPredicate; +import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.LessThanEqual; import org.apache.doris.nereids.trees.expressions.Or; import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.util.ExpressionUtils; @@ -130,10 +132,22 @@ public class RangeInference extends ExpressionVisitor<RangeInference.ValueDesc, Expression originExpr, List<Expression> predicates, BinaryOperator<ValueDesc> op, boolean isAnd) { + boolean convertIsNullToEmptyValue = isAnd && predicates.stream().anyMatch(expr -> expr instanceof NullLiteral); Multimap<Expression, ValueDesc> groupByReference = Multimaps.newListMultimap(new LinkedHashMap<>(), ArrayList::new); for (Expression predicate : predicates) { - ValueDesc valueDesc = predicate.accept(this, context); + // EmptyValue(a) = IsNull(a) and null, it doesn't equals to IsNull(a). + // Only the and expression contains at least a null literal in its conjunctions, + // then EmptyValue(a) can equivalent to IsNull(a). + // so for expression and(IsNull(a), IsNull(b), ..., null), a, b can convert to EmptyValue. + // What's more, if a is not nullable, then EmptyValue(a) always equals to IsNull(a), + // but we don't consider this case here, we should fold IsNull(a) to FALSE using other rule. + ValueDesc valueDesc = null; + if (convertIsNullToEmptyValue && predicate instanceof IsNull) { + valueDesc = new EmptyValue(context, ((IsNull) predicate).child(), predicate); + } else { + valueDesc = predicate.accept(this, context); + } List<ValueDesc> valueDescs = (List<ValueDesc>) groupByReference.get(valueDesc.reference); valueDescs.add(valueDesc); } @@ -461,6 +475,11 @@ public class RangeInference extends ExpressionVisitor<RangeInference.ValueDesc, @Override public ValueDesc union(ValueDesc other) { + // for RangeValue/DiscreteValue/UnknownValue, when union with EmptyValue, + // call EmptyValue.union(this) => this + if (other instanceof EmptyValue) { + return other.union(this); + } Expression originExpr = FoldConstantRuleOnFE.evaluate( ExpressionUtils.or(toExpr, other.toExpr), context); return new UnknownValue(context, originExpr, @@ -469,6 +488,11 @@ public class RangeInference extends ExpressionVisitor<RangeInference.ValueDesc, @Override public ValueDesc intersect(ValueDesc other) { + // for RangeValue/DiscreteValue/UnknownValue, when intersect with EmptyValue, + // call EmptyValue.intersect(this) => EmptyValue + if (other instanceof EmptyValue) { + return other.intersect(this); + } Expression originExpr = FoldConstantRuleOnFE.evaluate( ExpressionUtils.and(toExpr, other.toExpr), context); return new UnknownValue(context, originExpr, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java index 628d94d1dcc..4666342943a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java @@ -25,22 +25,16 @@ import org.apache.doris.nereids.rules.expression.rules.RangeInference.EmptyValue import org.apache.doris.nereids.rules.expression.rules.RangeInference.RangeValue; import org.apache.doris.nereids.rules.expression.rules.RangeInference.UnknownValue; import org.apache.doris.nereids.rules.expression.rules.RangeInference.ValueDesc; -import org.apache.doris.nereids.trees.expressions.And; import org.apache.doris.nereids.trees.expressions.CompoundPredicate; import org.apache.doris.nereids.trees.expressions.EqualTo; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.GreaterThan; import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; import org.apache.doris.nereids.trees.expressions.InPredicate; -import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.LessThanEqual; -import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.Or; -import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; import org.apache.doris.nereids.trees.expressions.literal.Literal; -import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; -import org.apache.doris.nereids.types.BooleanType; import org.apache.doris.nereids.util.ExpressionUtils; import com.google.common.collect.BoundType; @@ -52,7 +46,6 @@ import org.apache.commons.lang3.NotImplementedException; import java.util.Iterator; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; /** * This class implements the function to simplify expression range. @@ -108,11 +101,7 @@ public class SimplifyRange implements ExpressionPatternRuleFactory { private Expression getExpression(EmptyValue value) { Expression reference = value.getReference(); - if (reference.nullable()) { - return new And(new IsNull(reference), new NullLiteral(BooleanType.INSTANCE)); - } else { - return BooleanLiteral.FALSE; - } + return ExpressionUtils.falseOrNull(reference); } private Expression getExpression(RangeValue value) { @@ -136,11 +125,7 @@ public class SimplifyRange implements ExpressionPatternRuleFactory { if (!result.isEmpty()) { return ExpressionUtils.and(result); } else { - if (reference.nullable()) { - return new Or(new Not(new IsNull(reference)), new NullLiteral(BooleanType.INSTANCE)); - } else { - return BooleanLiteral.TRUE; - } + return ExpressionUtils.trueOrNull(reference); } } @@ -167,8 +152,15 @@ public class SimplifyRange implements ExpressionPatternRuleFactory { if (sourceValues.isEmpty()) { return originExpr; } - List<Expression> sourceExprs = sourceValues.stream().map(sourceValue -> getExpression(sourceValue)) - .collect(Collectors.toList()); + List<Expression> sourceExprs = Lists.newArrayListWithExpectedSize(sourceValues.size()); + for (ValueDesc sourceValue : sourceValues) { + Expression expr = getExpression(sourceValue); + if (value.isAnd()) { + sourceExprs.addAll(ExpressionUtils.extractConjunction(expr)); + } else { + sourceExprs.addAll(ExpressionUtils.extractDisjunction(expr)); + } + } Expression result = value.isAnd() ? ExpressionUtils.and(sourceExprs) : ExpressionUtils.or(sourceExprs); result = FoldConstantRuleOnFE.evaluate(result, value.getExpressionRewriteContext()); // ATTN: we must return original expr, because OrToIn is implemented with MutableState, diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java index 296cae5fe40..0fcc8043944 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java @@ -277,7 +277,7 @@ class ExpressionRewriteTest extends ExpressionRewriteTestHelper { } @Test - void testOrAddMinMax() { + void testAddMinMax() { executor = new ExpressionRuleExecutor(ImmutableList.of( bottomUp( AddMinMax.INSTANCE @@ -336,4 +336,44 @@ class ExpressionRewriteTest extends ExpressionRewriteTestHelper { "(AA in (timestamp '2024-01-01 02:00:00',timestamp '2024-01-02 02:00:00',timestamp '2024-01-03 02:00:00') or AA < timestamp '2024-01-01 01:00:00' ) and AA <= timestamp '2024-01-03 02:00:00'"); } + + @Test + void testSimplifyRangeAndAddMinMax() { + executor = new ExpressionRuleExecutor(ImmutableList.of( + bottomUp( + SimplifyRange.INSTANCE, + AddMinMax.INSTANCE + ) + )); + + assertRewriteAfterTypeCoercion("ISNULL(TA)", "ISNULL(TA)"); + assertRewriteAfterTypeCoercion("ISNULL(TA) and null", "ISNULL(TA) and null"); + assertRewriteAfterTypeCoercion("ISNULL(TA) and ISNULL(TA)", "ISNULL(TA)"); + assertRewriteAfterTypeCoercion("ISNULL(TA) or ISNULL(TA)", "ISNULL(TA)"); + assertRewriteAfterTypeCoercion("ISNULL(TA) and TA between 20 and 10", "ISNULL(TA) and null"); + // assertRewriteAfterTypeCoercion("ISNULL(TA) and TA > 10", "ISNULL(TA) and null"); // should be, but not support now + assertRewriteAfterTypeCoercion("ISNULL(TA) and TA > 10 and null", "ISNULL(TA) and null"); + assertRewriteAfterTypeCoercion("ISNULL(TA) or TA > 10", "ISNULL(TA) or TA > 10"); + // assertRewriteAfterTypeCoercion("(TA < 30 or TA > 40) and TA between 20 and 10", "TA IS NULL AND NULL"); // should be, but not support because flatten and + assertRewriteAfterTypeCoercion("(TA < 30 or TA > 40) and TA is null and null", "TA IS NULL AND NULL"); + assertRewriteAfterTypeCoercion("(TA < 30 or TA > 40) or TA between 20 and 10", "TA < 30 or TA > 40"); + + assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 30 and 40 or TA between 60 and 50", + "(TA <= 20 or TA >= 30) and TA >= 10 and TA <= 40"); + // should be, but not support yet, because 'TA is null and null' => UnknownValue(EmptyValue(TA) and null) + //assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 30 and 40 or TA is null and null", + // "(TA <= 20 or TA >= 30) and TA >= 10 and TA <= 40"); + assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 30 and 40 or TA is null and null", + "(TA <= 20 or TA >= 30 or TA is null and null) and TA >= 10 and TA <= 40"); + assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 30 and 40 or TA is null", + "TA >= 10 and TA <= 20 or TA >= 30 and TA <= 40 or TA is null"); + assertRewriteAfterTypeCoercion("ISNULL(TB) and (TA between 10 and 20 or TA between 30 and 40 or TA between 60 and 50)", + "ISNULL(TB) and ((TA <= 20 or TA >= 30) and TA >= 10 and TA <= 40)"); + assertRewriteAfterTypeCoercion("ISNULL(TB) and (TA between 10 and 20 or TA between 30 and 40 or TA is null)", + "ISNULL(TB) and (TA >= 10 and TA <= 20 or TA >= 30 and TA <= 40 or TA is null)"); + assertRewriteAfterTypeCoercion("TB between 20 and 10 and (TA between 10 and 20 or TA between 30 and 40 or TA between 60 and 50)", + "TB IS NULL AND NULL and (TA <= 20 or TA >= 30) and TA >= 10 and TA <= 40"); + assertRewriteAfterTypeCoercion("TA between 10 and 20 and TB between 10 and 20 or TA between 30 and 40 and TB between 30 and 40 or TA between 60 and 50 and TB between 60 and 50", + "(TA <= 20 and TB <= 20 or TA >= 30 and TB >= 30 or TA is null and null and TB is null) and TA >= 10 and TA <= 40 and TB >= 10 and TB <= 40"); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org