This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6b5cef3ea51 [opt](nereids) range inference convert isnull to 
EmptyValue  (#45310)
6b5cef3ea51 is described below

commit 6b5cef3ea5105f2ff85bb7bb0b711b305dfd54b2
Author: yujun <yu...@selectdb.com>
AuthorDate: Mon Dec 16 18:29:17 2024 +0800

    [opt](nereids) range inference convert isnull to EmptyValue  (#45310)
    
    Problem Summary:
    
    for sql
    
    a > 10 and a < 20 or a > 30 and a < 40 or  a > 60 and a < 50
    
    AddMinMax should opt it as:
    
    (a < 20 or a > 30 or a is null and null) and a > 10 and a < 40
    
    But it fail, because the SimplifyRange will opt it as
    
    a > 10 and a < 20 or a > 30 and a < 40 or  a is null and null
    
    since the 3rd is `a is null and null`, it couldn't get a's range and
    suppose a's range is [-00, +00], then the whole expression, it will get
    a's range is [-00, +00], to fix this problem, RangeInference need to
    convert `a is null and null` to EmptyValue instead of UnknownValue.
---
 .../nereids/rules/expression/rules/AddMinMax.java  | 12 ++++++-
 .../rules/expression/rules/RangeInference.java     | 26 +++++++++++++-
 .../rules/expression/rules/SimplifyRange.java      | 30 ++++++----------
 .../rules/expression/ExpressionRewriteTest.java    | 42 +++++++++++++++++++++-
 4 files changed, 88 insertions(+), 22 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java
index 639616244ec..c1efb82bac6 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/AddMinMax.java
@@ -326,7 +326,17 @@ public class AddMinMax implements 
ExpressionPatternRuleFactory {
         for (int i = 1; i < sourceValues.size(); i++) {
             // process in sourceValues[i]
             Map<Expression, MinMaxValue> minMaxValues = 
getExprMinMaxValues(sourceValues.get(i));
-            for (Map.Entry<Expression, MinMaxValue> entry : 
minMaxValues.entrySet()) {
+            // merge values of sourceValues[i] into result.
+            // also keep the value's relative order in sourceValues[i].
+            // for example, if a and b in sourceValues[i], but not in result, 
then during merging,
+            // a and b will assign a new exprOrderIndex (using 
nextExprOrderIndex).
+            // if in sourceValues[i], a's exprOrderIndex < b's exprOrderIndex,
+            // then make sure in result, a's new exprOrderIndex < b's new 
exprOrderIndex.
+            // so that their relative order can preserve.
+            List<Map.Entry<Expression, MinMaxValue>> minMaxValueList = 
minMaxValues.entrySet().stream()
+                    .sorted((a, b) -> 
Integer.compare(a.getValue().exprOrderIndex, b.getValue().exprOrderIndex))
+                    .collect(Collectors.toList());
+            for (Map.Entry<Expression, MinMaxValue> entry : minMaxValueList) {
                 Expression expr = entry.getKey();
                 MinMaxValue value = result.get(expr);
                 MinMaxValue otherValue = entry.getValue();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java
index 74099df1123..a94444e1607 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/RangeInference.java
@@ -25,10 +25,12 @@ import 
org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.GreaterThan;
 import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
 import org.apache.doris.nereids.trees.expressions.InPredicate;
+import org.apache.doris.nereids.trees.expressions.IsNull;
 import org.apache.doris.nereids.trees.expressions.LessThan;
 import org.apache.doris.nereids.trees.expressions.LessThanEqual;
 import org.apache.doris.nereids.trees.expressions.Or;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.util.ExpressionUtils;
 
@@ -130,10 +132,22 @@ public class RangeInference extends 
ExpressionVisitor<RangeInference.ValueDesc,
             Expression originExpr, List<Expression> predicates,
             BinaryOperator<ValueDesc> op, boolean isAnd) {
 
+        boolean convertIsNullToEmptyValue = isAnd && 
predicates.stream().anyMatch(expr -> expr instanceof NullLiteral);
         Multimap<Expression, ValueDesc> groupByReference
                 = Multimaps.newListMultimap(new LinkedHashMap<>(), 
ArrayList::new);
         for (Expression predicate : predicates) {
-            ValueDesc valueDesc = predicate.accept(this, context);
+            // EmptyValue(a) = IsNull(a) and null,  it doesn't equals to 
IsNull(a).
+            // Only the and expression contains at least a null literal in its 
conjunctions,
+            // then EmptyValue(a) can equivalent to IsNull(a).
+            // so for expression and(IsNull(a), IsNull(b), ..., null), a, b 
can convert to EmptyValue.
+            // What's more, if a is not nullable, then EmptyValue(a) always 
equals to IsNull(a),
+            // but we don't consider this case here, we should fold IsNull(a) 
to FALSE using other rule.
+            ValueDesc valueDesc = null;
+            if (convertIsNullToEmptyValue && predicate instanceof IsNull) {
+                valueDesc = new EmptyValue(context, ((IsNull) 
predicate).child(), predicate);
+            } else {
+                valueDesc = predicate.accept(this, context);
+            }
             List<ValueDesc> valueDescs = (List<ValueDesc>) 
groupByReference.get(valueDesc.reference);
             valueDescs.add(valueDesc);
         }
@@ -461,6 +475,11 @@ public class RangeInference extends 
ExpressionVisitor<RangeInference.ValueDesc,
 
         @Override
         public ValueDesc union(ValueDesc other) {
+            // for RangeValue/DiscreteValue/UnknownValue, when union with 
EmptyValue,
+            // call EmptyValue.union(this) => this
+            if (other instanceof EmptyValue) {
+                return other.union(this);
+            }
             Expression originExpr = FoldConstantRuleOnFE.evaluate(
                     ExpressionUtils.or(toExpr, other.toExpr), context);
             return new UnknownValue(context, originExpr,
@@ -469,6 +488,11 @@ public class RangeInference extends 
ExpressionVisitor<RangeInference.ValueDesc,
 
         @Override
         public ValueDesc intersect(ValueDesc other) {
+            // for RangeValue/DiscreteValue/UnknownValue, when intersect with 
EmptyValue,
+            // call EmptyValue.intersect(this) => EmptyValue
+            if (other instanceof EmptyValue) {
+                return other.intersect(this);
+            }
             Expression originExpr = FoldConstantRuleOnFE.evaluate(
                     ExpressionUtils.and(toExpr, other.toExpr), context);
             return new UnknownValue(context, originExpr,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
index 628d94d1dcc..4666342943a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
@@ -25,22 +25,16 @@ import 
org.apache.doris.nereids.rules.expression.rules.RangeInference.EmptyValue
 import 
org.apache.doris.nereids.rules.expression.rules.RangeInference.RangeValue;
 import 
org.apache.doris.nereids.rules.expression.rules.RangeInference.UnknownValue;
 import 
org.apache.doris.nereids.rules.expression.rules.RangeInference.ValueDesc;
-import org.apache.doris.nereids.trees.expressions.And;
 import org.apache.doris.nereids.trees.expressions.CompoundPredicate;
 import org.apache.doris.nereids.trees.expressions.EqualTo;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.GreaterThan;
 import org.apache.doris.nereids.trees.expressions.GreaterThanEqual;
 import org.apache.doris.nereids.trees.expressions.InPredicate;
-import org.apache.doris.nereids.trees.expressions.IsNull;
 import org.apache.doris.nereids.trees.expressions.LessThan;
 import org.apache.doris.nereids.trees.expressions.LessThanEqual;
-import org.apache.doris.nereids.trees.expressions.Not;
 import org.apache.doris.nereids.trees.expressions.Or;
-import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
-import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
-import org.apache.doris.nereids.types.BooleanType;
 import org.apache.doris.nereids.util.ExpressionUtils;
 
 import com.google.common.collect.BoundType;
@@ -52,7 +46,6 @@ import org.apache.commons.lang3.NotImplementedException;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
-import java.util.stream.Collectors;
 
 /**
  * This class implements the function to simplify expression range.
@@ -108,11 +101,7 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
 
     private Expression getExpression(EmptyValue value) {
         Expression reference = value.getReference();
-        if (reference.nullable()) {
-            return new And(new IsNull(reference), new 
NullLiteral(BooleanType.INSTANCE));
-        } else {
-            return BooleanLiteral.FALSE;
-        }
+        return ExpressionUtils.falseOrNull(reference);
     }
 
     private Expression getExpression(RangeValue value) {
@@ -136,11 +125,7 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
         if (!result.isEmpty()) {
             return ExpressionUtils.and(result);
         } else {
-            if (reference.nullable()) {
-                return new Or(new Not(new IsNull(reference)), new 
NullLiteral(BooleanType.INSTANCE));
-            } else {
-                return BooleanLiteral.TRUE;
-            }
+            return ExpressionUtils.trueOrNull(reference);
         }
     }
 
@@ -167,8 +152,15 @@ public class SimplifyRange implements 
ExpressionPatternRuleFactory {
         if (sourceValues.isEmpty()) {
             return originExpr;
         }
-        List<Expression> sourceExprs = sourceValues.stream().map(sourceValue 
-> getExpression(sourceValue))
-                .collect(Collectors.toList());
+        List<Expression> sourceExprs = 
Lists.newArrayListWithExpectedSize(sourceValues.size());
+        for (ValueDesc sourceValue : sourceValues) {
+            Expression expr = getExpression(sourceValue);
+            if (value.isAnd()) {
+                sourceExprs.addAll(ExpressionUtils.extractConjunction(expr));
+            } else {
+                sourceExprs.addAll(ExpressionUtils.extractDisjunction(expr));
+            }
+        }
         Expression result = value.isAnd() ? ExpressionUtils.and(sourceExprs) : 
ExpressionUtils.or(sourceExprs);
         result = FoldConstantRuleOnFE.evaluate(result, 
value.getExpressionRewriteContext());
         // ATTN: we must return original expr, because OrToIn is implemented 
with MutableState,
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
index 296cae5fe40..0fcc8043944 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/ExpressionRewriteTest.java
@@ -277,7 +277,7 @@ class ExpressionRewriteTest extends 
ExpressionRewriteTestHelper {
     }
 
     @Test
-    void testOrAddMinMax() {
+    void testAddMinMax() {
         executor = new ExpressionRuleExecutor(ImmutableList.of(
             bottomUp(
                 AddMinMax.INSTANCE
@@ -336,4 +336,44 @@ class ExpressionRewriteTest extends 
ExpressionRewriteTestHelper {
                 "(AA in (timestamp '2024-01-01 02:00:00',timestamp '2024-01-02 
02:00:00',timestamp '2024-01-03 02:00:00') or AA < timestamp '2024-01-01 
01:00:00' ) and AA <= timestamp '2024-01-03 02:00:00'");
 
     }
+
+    @Test
+    void testSimplifyRangeAndAddMinMax() {
+        executor = new ExpressionRuleExecutor(ImmutableList.of(
+                bottomUp(
+                        SimplifyRange.INSTANCE,
+                        AddMinMax.INSTANCE
+                )
+        ));
+
+        assertRewriteAfterTypeCoercion("ISNULL(TA)", "ISNULL(TA)");
+        assertRewriteAfterTypeCoercion("ISNULL(TA) and null", "ISNULL(TA) and 
null");
+        assertRewriteAfterTypeCoercion("ISNULL(TA) and ISNULL(TA)", 
"ISNULL(TA)");
+        assertRewriteAfterTypeCoercion("ISNULL(TA) or ISNULL(TA)", 
"ISNULL(TA)");
+        assertRewriteAfterTypeCoercion("ISNULL(TA) and TA between 20 and 10", 
"ISNULL(TA) and null");
+        // assertRewriteAfterTypeCoercion("ISNULL(TA) and TA > 10", 
"ISNULL(TA) and null"); // should be, but not support now
+        assertRewriteAfterTypeCoercion("ISNULL(TA) and TA > 10 and null", 
"ISNULL(TA) and null");
+        assertRewriteAfterTypeCoercion("ISNULL(TA) or TA > 10", "ISNULL(TA) or 
TA > 10");
+        // assertRewriteAfterTypeCoercion("(TA < 30 or TA > 40) and TA between 
20 and 10", "TA IS NULL AND NULL"); // should be, but not support because 
flatten and
+        assertRewriteAfterTypeCoercion("(TA < 30 or TA > 40) and TA is null 
and null", "TA IS NULL AND NULL");
+        assertRewriteAfterTypeCoercion("(TA < 30 or TA > 40) or TA between 20 
and 10", "TA < 30 or TA > 40");
+
+        assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 30 
and 40 or TA between 60 and 50",
+                "(TA <= 20 or TA >= 30) and TA >= 10 and TA <= 40");
+        // should be, but not support yet, because 'TA is null and null' => 
UnknownValue(EmptyValue(TA) and null)
+        //assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 
30 and 40 or TA is null and null",
+        //        "(TA <= 20 or TA >= 30) and TA >= 10 and TA <= 40");
+        assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 30 
and 40 or TA is null and null",
+                "(TA <= 20 or TA >= 30 or TA is null and null) and TA >= 10 
and TA <= 40");
+        assertRewriteAfterTypeCoercion("TA between 10 and 20 or TA between 30 
and 40 or TA is null",
+                "TA >= 10 and TA <= 20 or TA >= 30 and TA <= 40 or TA is 
null");
+        assertRewriteAfterTypeCoercion("ISNULL(TB) and (TA between 10 and 20 
or TA between 30 and 40 or TA between 60 and 50)",
+                "ISNULL(TB) and ((TA <= 20 or TA >= 30) and TA >= 10 and TA <= 
40)");
+        assertRewriteAfterTypeCoercion("ISNULL(TB) and (TA between 10 and 20 
or TA between 30 and 40 or TA is null)",
+                "ISNULL(TB) and (TA >= 10 and TA <= 20 or TA >= 30 and TA <= 
40 or TA is null)");
+        assertRewriteAfterTypeCoercion("TB between 20 and 10 and (TA between 
10 and 20 or TA between 30 and 40 or TA between 60 and 50)",
+                "TB IS NULL AND NULL and (TA <= 20 or TA >= 30) and TA >= 10 
and TA <= 40");
+        assertRewriteAfterTypeCoercion("TA between 10 and 20 and TB between 10 
and 20 or TA between 30 and 40 and TB between 30 and 40 or TA between 60 and 50 
and TB between 60 and 50",
+                "(TA <= 20 and TB <= 20 or TA >= 30 and TB >= 30 or TA is null 
and null and TB is null) and TA >= 10 and TA <= 40 and TB >= 10 and TB <= 40");
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to