This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new a7f3c219834 branch-2.1: [opt](nereids) opt range inference for or
expression when out of order #46303 (#53706)
a7f3c219834 is described below
commit a7f3c219834f86675a852ced39f602140e81d4a3
Author: seawinde <[email protected]>
AuthorDate: Fri Jul 25 16:49:06 2025 +0800
branch-2.1: [opt](nereids) opt range inference for or expression when out
of order #46303 (#53706)
picked from part of #46303
---
.../rules/expression/rules/SimplifyRange.java | 44 +++++++++++++++++++---
.../rules/expression/SimplifyRangeTest.java | 40 +++++++++++++-------
2 files changed, 65 insertions(+), 19 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
index d5fd8e24783..434f7a6f5bd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyRange.java
@@ -46,7 +46,9 @@ import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Range;
+import com.google.common.collect.RangeSet;
import com.google.common.collect.Sets;
+import com.google.common.collect.TreeRangeSet;
import java.util.ArrayList;
import java.util.Collection;
@@ -157,18 +159,18 @@ public class SimplifyRange implements
ExpressionPatternRuleFactory {
@Override
public ValueDesc visitAnd(And and, ExpressionRewriteContext context) {
return simplify(context, and,
ExpressionUtils.extractConjunction(and),
- ValueDesc::intersect, ExpressionUtils::and);
+ ValueDesc::intersect, ExpressionUtils::and, true);
}
@Override
public ValueDesc visitOr(Or or, ExpressionRewriteContext context) {
return simplify(context, or,
ExpressionUtils.extractDisjunction(or),
- ValueDesc::union, ExpressionUtils::or);
+ ValueDesc::union, ExpressionUtils::or, false);
}
private ValueDesc simplify(ExpressionRewriteContext context,
Expression originExpr, List<Expression> predicates,
- BinaryOperator<ValueDesc> op, BinaryOperator<Expression>
exprOp) {
+ BinaryOperator<ValueDesc> op, BinaryOperator<Expression>
exprOp, boolean isAnd) {
Multimap<Expression, ValueDesc> groupByReference
= Multimaps.newListMultimap(new LinkedHashMap<>(),
ArrayList::new);
@@ -181,7 +183,9 @@ public class SimplifyRange implements
ExpressionPatternRuleFactory {
List<ValueDesc> valuePerRefs = Lists.newArrayList();
for (Entry<Expression, Collection<ValueDesc>> referenceValues :
groupByReference.asMap().entrySet()) {
List<ValueDesc> valuePerReference = (List)
referenceValues.getValue();
-
+ if (!isAnd) {
+ valuePerReference = unionDiscreteAndRange(context,
referenceValues.getKey(), valuePerReference);
+ }
// merge per reference
ValueDesc simplifiedValue = valuePerReference.get(0);
for (int i = 1; i < valuePerReference.size(); i++) {
@@ -200,6 +204,30 @@ public class SimplifyRange implements
ExpressionPatternRuleFactory {
}
}
+ /** merge discrete and ranges only, no merge other value desc */
+ public static List<ValueDesc>
unionDiscreteAndRange(ExpressionRewriteContext context,
+ Expression reference, List<ValueDesc> valueDescs) {
+ List<ValueDesc> result =
Lists.newArrayListWithExpectedSize(valueDescs.size());
+
+ // for (a >= 8 and a < 9) or (a >=12 and a < 13) or (a >=13 and a <
14) can convert to
+ // (a >= 8 and a < 9) or (a >=12 and a < 14)
+ RangeSet<Literal> rangeSet = TreeRangeSet.create();
+ for (ValueDesc valueDesc : valueDescs) {
+ if (valueDesc instanceof RangeValue) {
+ Range<Literal> range = ((RangeValue) valueDesc).range;
+ rangeSet.add(range);
+ } else {
+ result.add(valueDesc);
+ }
+ }
+ for (Range<Literal> range : rangeSet.asRanges()) {
+ RangeValue rangeValue = new RangeValue(context, reference,
RangeValue.toExpression(range, reference));
+ rangeValue.range = range;
+ result.add(rangeValue);
+ }
+ return result;
+ }
+
private abstract static class ValueDesc {
ExpressionRewriteContext context;
Expression toExpr;
@@ -357,7 +385,9 @@ public class SimplifyRange implements
ExpressionPatternRuleFactory {
if (range.isConnected(o.range)) {
RangeValue rangeValue = new RangeValue(context, reference,
originExpr);
rangeValue.range = range.intersection(o.range);
- return rangeValue;
+ if (!rangeValue.range.isEmpty()) {
+ return rangeValue;
+ }
}
return new EmptyValue(context, reference, originExpr);
}
@@ -372,6 +402,10 @@ public class SimplifyRange implements
ExpressionPatternRuleFactory {
@Override
public Expression toExpression() {
+ return toExpression(this.range, this.reference);
+ }
+
+ public static Expression toExpression(Range<Literal> range, Expression
reference) {
List<Expression> result = Lists.newArrayList();
if (range.hasLowerBound()) {
if (range.lowerBoundType() == BoundType.CLOSED) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
index 79906880f53..ca2cab9905b 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyRangeTest.java
@@ -65,6 +65,8 @@ public class SimplifyRangeTest extends ExpressionRewrite {
executor = new ExpressionRuleExecutor(ImmutableList.of(
bottomUp(SimplifyRange.INSTANCE)
));
+ assertRewrite("(TA >= 8 and TA < 8) or (TA >= 8 and TA < 8)", "TA is
null and null");
+ assertRewrite("(TA >=12 and TA < 13) or (TA >= 15 and TA < 16) or (TA
>= 16 and TA < 17)", "(TA >=12 and TA < 13) or (TA >=15 and TA < 17)");
assertRewrite("TA", "TA");
assertRewrite("TA > 3 or TA > null", "TA > 3 OR NULL");
assertRewrite("TA > 3 or TA < null", "TA > 3 OR NULL");
@@ -85,16 +87,16 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewrite("(TA > 3 and TA < 1) or (TA > 7 and TA < 5)", "TA is
null and null");
assertRewriteNotNull("TA > 3 and TA < 1", "FALSE");
assertRewrite("TA > 3 and TA < 1", "TA is null and null");
- assertRewrite("TA >= 3 and TA < 3", "TA >= 3 and TA < 3");
+ assertRewrite("TA >= 3 and TA < 3", "TA is null and null");
assertRewriteNotNull("TA = 1 and TA > 10", "FALSE");
assertRewrite("TA = 1 and TA > 10", "TA is null and null");
- assertRewrite("TA > 5 or TA < 1", "TA > 5 or TA < 1");
+ assertRewrite("TA > 5 or TA < 1", "TA < 1 or TA > 5");
assertRewrite("TA > 5 or TA > 1 or TA > 10", "TA > 1");
assertRewrite("TA > 5 or TA > 1 or TA < 10", "TA is not null or null");
assertRewriteNotNull("TA > 5 or TA > 1 or TA < 10", "TRUE");
assertRewrite("TA > 5 and TA > 1 and TA > 10", "TA > 10");
assertRewrite("TA > 5 and TA > 1 and TA < 10", "TA > 5 and TA < 10");
- assertRewrite("TA > 1 or TA < 1", "TA > 1 or TA < 1");
+ assertRewrite("TA > 1 or TA < 1", "TA < 1 or TA > 1");
assertRewrite("TA > 1 or TA < 10", "TA is not null or null");
assertRewriteNotNull("TA > 1 or TA < 10", "TRUE");
assertRewrite("TA > 5 and TA < 10", "TA > 5 and TA < 10");
@@ -109,7 +111,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewrite("(TA > 10 or TA > 20) and (TB > 10 and TB > 20)", "TA >
10 and TB > 20");
assertRewrite("((TB > 30 and TA > 40) and TA > 20) and (TB > 10 and TB
> 20)", "TB > 30 and TA > 40");
assertRewrite("(TA > 10 and TB > 10) or (TB > 10 and TB > 20)", "TA >
10 and TB > 10 or TB > 20");
- assertRewrite("((TA > 10 or TA > 5) and TB > 10) or (TB > 10 and (TB >
20 or TB < 10))", "(TA > 5 and TB > 10) or (TB > 10 and (TB > 20 or TB < 10))");
+ assertRewrite("((TA > 10 or TA > 5) and TB > 10) or (TB > 10 and (TB >
20 or TB < 10))", "(TA > 5 and TB > 10) or (TB > 10 and (TB < 10 or TB > 20))");
assertRewriteNotNull("TA in (1,2,3) and TA > 10", "FALSE");
assertRewrite("TA in (1,2,3) and TA > 10", "TA is null and null");
assertRewrite("TA in (1,2,3) and TA >= 1", "TA in (1,2,3)");
@@ -147,15 +149,15 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewrite("(TA + TC > 3 and TA + TC < 1) or (TA + TC > 7 and TA +
TC < 5)", "(TA + TC) is null and null");
assertRewriteNotNull("TA + TC > 3 and TA + TC < 1", "FALSE");
assertRewrite("TA + TC > 3 and TA + TC < 1", "(TA + TC) is null and
null");
- assertRewrite("TA + TC >= 3 and TA + TC < 3", "TA + TC >= 3 and TA +
TC < 3");
+ assertRewrite("TA + TC >= 3 and TA + TC < 3", "TA + TC is null and
null");
assertRewriteNotNull("TA + TC = 1 and TA + TC > 10", "FALSE");
assertRewrite("TA + TC = 1 and TA + TC > 10", "(TA + TC) is null and
null");
- assertRewrite("TA + TC > 5 or TA + TC < 1", "TA + TC > 5 or TA + TC <
1");
+ assertRewrite("TA + TC > 5 or TA + TC < 1", "TA + TC < 1 or TA + TC >
5");
assertRewrite("TA + TC > 5 or TA + TC > 1 or TA + TC > 10", "TA + TC >
1");
assertRewrite("TA + TC > 5 or TA + TC > 1 or TA + TC < 10", "(TA + TC)
is not null or null");
assertRewrite("TA + TC > 5 and TA + TC > 1 and TA + TC > 10", "TA + TC
> 10");
assertRewrite("TA + TC > 5 and TA + TC > 1 and TA + TC < 10", "TA + TC
> 5 and TA + TC < 10");
- assertRewrite("TA + TC > 1 or TA + TC < 1", "TA + TC > 1 or TA + TC <
1");
+ assertRewrite("TA + TC > 1 or TA + TC < 1", "TA + TC < 1 or TA + TC >
1");
assertRewrite("TA + TC > 1 or TA + TC < 10", "(TA + TC) is not null or
null");
assertRewrite("TA + TC > 5 and TA + TC < 10", "TA + TC > 5 and TA + TC
< 10");
assertRewrite("TA + TC > 5 and TA + TC > 10", "TA + TC > 10");
@@ -168,7 +170,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewrite("(TA + TC > 10 or TA + TC > 20) and (TB > 10 and TB >
20)", "TA + TC > 10 and TB > 20");
assertRewrite("((TB > 30 and TA + TC > 40) and TA + TC > 20) and (TB >
10 and TB > 20)", "TB > 30 and TA + TC > 40");
assertRewrite("(TA + TC > 10 and TB > 10) or (TB > 10 and TB > 20)",
"TA + TC > 10 and TB > 10 or TB > 20");
- assertRewrite("((TA + TC > 10 or TA + TC > 5) and TB > 10) or (TB > 10
and (TB > 20 or TB < 10))", "(TA + TC > 5 and TB > 10) or (TB > 10 and (TB > 20
or TB < 10))");
+ assertRewrite("((TA + TC > 10 or TA + TC > 5) and TB > 10) or (TB > 10
and (TB > 20 or TB < 10))", "(TA + TC > 5 and TB > 10) or (TB > 10 and (TB < 10
or TB > 20))");
assertRewriteNotNull("TA + TC in (1,2,3) and TA + TC > 10", "FALSE");
assertRewrite("TA + TC in (1,2,3) and TA + TC > 10", "(TA + TC) is
null and null");
assertRewrite("TA + TC in (1,2,3) and TA + TC >= 1", "TA + TC in
(1,2,3)");
@@ -204,6 +206,9 @@ public class SimplifyRangeTest extends ExpressionRewrite {
executor = new ExpressionRuleExecutor(ImmutableList.of(
bottomUp(SimplifyRange.INSTANCE)
));
+ assertRewrite(
+ "(AA >= date '2024-01-01' and AA < date '2024-01-02') or (AA
>= date '2024-01-05' and AA < date '2024-01-06') or (AA >= date '2024-01-06'
and AA < date '2024-01-07')",
+ "(AA >= date '2024-01-01' and AA < date '2024-01-02') or (AA
>= date '2024-01-05' and AA < date '2024-01-07')");
assertRewrite("AA", "AA");
assertRewrite(
"(AA >= date '2024-01-01' and AA <= date '2024-01-03') or (AA
> date '2024-01-05' and AA < date '2024-01-07')",
@@ -217,11 +222,13 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewriteNotNull("AA > date '2024-01-03' and AA < date
'2024-01-01'", "FALSE");
assertRewrite("AA > date '2024-01-03' and AA < date '2024-01-01'", "AA
is null and null");
assertRewrite("AA >= date '2024-01-01' and AA < date '2024-01-01'",
- "AA >= date '2024-01-01' and AA < date '2024-01-01'");
+ "AA is null and null");
+ assertRewrite("(AA >= date '2024-01-01' and AA < date '2024-01-01') or
(AA >= date '2024-01-01' and AA < date '2024-01-01')",
+ "AA is null and null");
assertRewriteNotNull("AA = date '2024-01-01' and AA > date
'2024-01-10'", "FALSE");
assertRewrite("AA = date '2024-01-01' and AA > date '2024-01-10'", "AA
is null and null");
assertRewrite("AA > date '2024-01-05' or AA < date '2024-01-01'",
- "AA > date '2024-01-05' or AA < date '2024-01-01'");
+ "AA < date '2024-01-01' or AA > date '2024-01-05'");
assertRewrite("AA > date '2024-01-05' or AA > date '2024-01-01' or AA
> date '2024-01-10'",
"AA > date '2024-01-01'");
assertRewrite("AA > date '2024-01-05' or AA > date '2024-01-01' or AA
< date '2024-01-10'", "AA is not null or null");
@@ -231,7 +238,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewrite("AA > date '2024-01-05' and AA > date '2024-01-01' and
AA < date '2024-01-10'",
"AA > date '2024-01-05' and AA < date '2024-01-10'");
assertRewrite("AA > date '2024-01-05' or AA < date '2024-01-05'",
- "AA > date '2024-01-05' or AA < date '2024-01-05'");
+ "AA < date '2024-01-05' or AA > date '2024-01-05'");
assertRewrite("AA > date '2024-01-01' or AA < date '2024-01-10'", "AA
is not null or null");
assertRewriteNotNull("AA > date '2024-01-01' or AA < date
'2024-01-10'", "TRUE");
assertRewrite("AA > date '2024-01-05' and AA < date '2024-01-10'",
@@ -285,6 +292,9 @@ public class SimplifyRangeTest extends ExpressionRewrite {
bottomUp(SimplifyRange.INSTANCE)
));
assertRewrite("CA", "CA");
+ assertRewrite(
+ "(CA >= timestamp '2024-01-01 00:00:00' and CA < timestamp
'2024-01-02 00:00:00') or (CA >= timestamp '2024-01-05 00:00:00' and CA <
timestamp '2024-01-07 00:00:00') or (CA >= timestamp '2024-01-07 00:00:00' and
CA < timestamp '2024-01-08 00:00:00')",
+ "(CA >= timestamp '2024-01-01 00:00:00' and CA < timestamp
'2024-01-02 00:00:00') or (CA >= timestamp '2024-01-05 00:00:00' and CA <
timestamp '2024-01-08 00:00:00')");
assertRewrite(
"(CA >= timestamp '2024-01-01 00:00:00' and CA <= timestamp
'2024-01-03 00:00:00') or (CA > timestamp '2024-01-05 00:00:00' and CA <
timestamp '2024-01-07 00:00:00')",
"(CA >= timestamp '2024-01-01 00:00:00' and CA <= timestamp
'2024-01-03 00:00:00') or (CA > timestamp '2024-01-05 00:00:00' and CA <
timestamp '2024-01-07 00:00:00')");
@@ -297,11 +307,13 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewriteNotNull("CA > timestamp '2024-01-03 00:00:10' and CA <
timestamp '2024-01-01 01:00:00'", "FALSE");
assertRewrite("CA > timestamp '2024-01-03 00:00:10' and CA < timestamp
'2024-01-01 01:00:00'", "CA is null and null");
assertRewrite("CA >= timestamp '2024-01-01 00:00:10' and CA <
timestamp '2024-01-01 00:00:10'",
- "CA >= timestamp '2024-01-01 00:00:10' and CA < timestamp
'2024-01-01 00:00:10'");
+ "CA is null and null");
+ assertRewrite("(CA >= timestamp '2024-01-01 00:00:10' and CA <
timestamp '2024-01-01 00:00:10') or (CA >= timestamp '2024-01-01 00:00:10' and
CA < timestamp '2024-01-01 00:00:10')",
+ "CA is null and null");
assertRewriteNotNull("CA = timestamp '2024-01-01 10:00:10' and CA >
timestamp '2024-01-10 00:00:10'", "FALSE");
assertRewrite("CA = timestamp '2024-01-01 10:00:10' and CA > timestamp
'2024-01-10 00:00:10'", "CA is null and null");
assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA < timestamp
'2024-01-01 00:00:10'",
- "CA > timestamp '2024-01-05 00:00:10' or CA < timestamp
'2024-01-01 00:00:10'");
+ "CA < timestamp '2024-01-01 00:00:10' or CA > timestamp
'2024-01-05 00:00:10'");
assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA > timestamp
'2024-01-01 00:00:10' or CA > timestamp '2024-01-10 00:00:10'",
"CA > timestamp '2024-01-01 00:00:10'");
assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA > timestamp
'2024-01-01 00:00:10' or CA < timestamp '2024-01-10 00:00:10'", "CA is not null
or null");
@@ -311,7 +323,7 @@ public class SimplifyRangeTest extends ExpressionRewrite {
assertRewrite("CA > timestamp '2024-01-05 00:00:10' and CA > timestamp
'2024-01-01 00:00:10' and CA < timestamp '2024-01-10 00:00:10'",
"CA > timestamp '2024-01-05 00:00:10' and CA < timestamp
'2024-01-10 00:00:10'");
assertRewrite("CA > timestamp '2024-01-05 00:00:10' or CA < timestamp
'2024-01-05 00:00:10'",
- "CA > timestamp '2024-01-05 00:00:10' or CA < timestamp
'2024-01-05 00:00:10'");
+ "CA < timestamp '2024-01-05 00:00:10' or CA > timestamp
'2024-01-05 00:00:10'");
assertRewrite("CA > timestamp '2024-01-01 00:02:10' or CA < timestamp
'2024-01-10 00:02:10'", "CA is not null or null");
assertRewriteNotNull("CA > timestamp '2024-01-01 00:00:00' or CA <
timestamp '2024-01-10 00:00:00'", "TRUE");
assertRewrite("CA > timestamp '2024-01-05 01:00:00' and CA < timestamp
'2024-01-10 01:00:00'",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]