This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 0c1d9356d3f [Feat](nereids) support date function in partition prune 
(#38743)
0c1d9356d3f is described below

commit 0c1d9356d3fe4c2c215a78606abf8fa1856c3fc8
Author: feiniaofeiafei <53502832+feiniaofeia...@users.noreply.github.com>
AuthorDate: Fri Aug 2 20:13:28 2024 +0800

    [Feat](nereids) support date function in partition prune (#38743)
    
    This pr supports to compute date(dt)>10.
    For example, there is a partition ('2020-01-01 10:00:00', '2020-02-05 
10:00:00') ,
    partition column is dt, and a predicate date(dt)<'2019-01-01 ',
    this partition can be pruned.
---
 .../rules/OneRangePartitionEvaluator.java          |  38 +--
 .../trees/expressions/functions/scalar/Date.java   |  19 +-
 .../test_date_function_prune_mono.groovy           | 310 +++++++++++++++++++++
 3 files changed, 335 insertions(+), 32 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java
index 6d034431545..ac909a5b9fa 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/OneRangePartitionEvaluator.java
@@ -47,9 +47,9 @@ import 
org.apache.doris.nereids.trees.expressions.functions.scalar.Date;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.DateTrunc;
 import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
+import org.apache.doris.nereids.trees.expressions.literal.MaxLiteral;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.BooleanType;
-import org.apache.doris.nereids.types.DataType;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.nereids.util.Utils;
 
@@ -645,36 +645,11 @@ public class OneRangePartitionEvaluator
         if (!(result.result instanceof Date)) {
             return result;
         }
-        date = (Date) result.result;
-        if (!(date.child() instanceof Slot) || !isPartitionSlot((Slot) 
date.child())) {
-            return result;
-        }
-        Slot partitionSlot = (Slot) date.child();
-        PartitionSlotType partitionSlotType = 
getPartitionSlotType(partitionSlot).get();
-        if (partitionSlotType != PartitionSlotType.RANGE || 
partitionSlotContainsNull.get(partitionSlot)) {
-            return result;
-        }
-        DataType childType = date.child().getDataType();
-        if (!childType.isDateTimeType() && !childType.isDateTimeV2Type()) {
-            return result;
-        }
-        ColumnRange dateTimeRange = 
result.childrenResult.get(0).columnRanges.get((Slot) date.child());
-        if (dateTimeRange.isEmptyRange()) {
-            return result;
+        Expression dateChild = date.child(0);
+        if (partitionSlotContainsNull.containsKey(dateChild)) {
+            partitionSlotContainsNull.put(date, true);
         }
-
-        Range<ColumnBound> span = dateTimeRange.span();
-        Literal lower = span.lowerEndpoint().getValue();
-        Literal upper = span.upperEndpoint().getValue();
-
-        Expression lowerDate = FoldConstantRuleOnFE.evaluate(new Date(lower), 
expressionRewriteContext);
-        Expression upperDate = FoldConstantRuleOnFE.evaluate(new Date(upper), 
expressionRewriteContext);
-
-        if (lowerDate instanceof Literal && upperDate instanceof Literal && 
lowerDate.equals(upperDate)) {
-            return new EvaluateRangeResult(lowerDate, result.columnRanges, 
result.childrenResult);
-        }
-
-        return result;
+        return computeMonotonicFunctionRange(result);
     }
 
     private boolean isPartitionSlot(Slot slot) {
@@ -859,7 +834,8 @@ public class OneRangePartitionEvaluator
         }
         Range<ColumnBound> span = childRange.span();
         Literal lower = span.hasLowerBound() ? span.lowerEndpoint().getValue() 
: null;
-        Literal upper = span.hasUpperBound() ? span.upperEndpoint().getValue() 
: null;
+        Literal upper = span.hasUpperBound() && 
!(span.upperEndpoint().getValue() instanceof MaxLiteral)
+                ? span.upperEndpoint().getValue() : null;
         Expression lowerValue = lower != null ? 
FoldConstantRuleOnFE.evaluate(func.withConstantArgs(lower),
                 expressionRewriteContext) : null;
         Expression upperValue = upper != null ? 
FoldConstantRuleOnFE.evaluate(func.withConstantArgs(upper),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Date.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Date.java
index 01fa2acd981..3f0cdcc6b8a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Date.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Date.java
@@ -21,6 +21,8 @@ import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.Monotonic;
+import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.DateTimeType;
@@ -37,7 +39,7 @@ import java.util.List;
  * ScalarFunction 'date'. This class is generated by GenerateFunction.
  */
 public class Date extends ScalarFunction
-        implements UnaryExpression, ExplicitlyCastableSignature, 
AlwaysNullable {
+        implements UnaryExpression, ExplicitlyCastableSignature, 
AlwaysNullable, Monotonic {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             
FunctionSignature.ret(DateV2Type.INSTANCE).args(DateTimeV2Type.SYSTEM_DEFAULT),
@@ -69,4 +71,19 @@ public class Date extends ScalarFunction
     public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
         return visitor.visitDate(this, context);
     }
+
+    @Override
+    public boolean isPositive() {
+        return true;
+    }
+
+    @Override
+    public int getMonotonicFunctionChildIndex() {
+        return 0;
+    }
+
+    @Override
+    public Expression withConstantArgs(Literal literal) {
+        return new Date(literal);
+    }
 }
diff --git 
a/regression-test/suites/nereids_rules_p0/partition_prune/test_date_function_prune_mono.groovy
 
b/regression-test/suites/nereids_rules_p0/partition_prune/test_date_function_prune_mono.groovy
new file mode 100644
index 00000000000..7af54e3162b
--- /dev/null
+++ 
b/regression-test/suites/nereids_rules_p0/partition_prune/test_date_function_prune_mono.groovy
@@ -0,0 +1,310 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_date_prune_mono") {
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+    sql "drop table if exists mal_test_partition_range5_date_mono"
+    sql"""
+        CREATE TABLE `mal_test_partition_range5_date_mono` (
+          `a` INT NULL,
+          `b` datetime not NULL,
+          `c` INT NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`a`, `b`, `c`)
+        PARTITION BY RANGE(`b`)
+        (PARTITION p1 VALUES [("2020-01-05 10:00:00"), ("2020-01-09 
10:00:00")),
+        PARTITION p2 VALUES [("2020-01-09 10:00:00"), ("2020-01-13 10:00:00")),
+        PARTITION p3 VALUES [("2020-01-13 10:00:00"), ("2020-01-19 10:00:00")))
+        DISTRIBUTED BY HASH(`a`) BUCKETS 10
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );"""
+    sql """insert into mal_test_partition_range5_date_mono 
values(1,"2020-01-09 09:00:00",4),(1,"2020-01-09 11:00:00",4),
+        (1,"2020-01-13 11:00:00",4),(1,"2020-01-13 09:00:00",4)"""
+    // > >= < <= = <=>
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)<="2020-01-08" """
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)<"2020-01-08" """
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
"2020-01-08">=date(b)"""
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
"2020-01-08" > date(b)"""
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)="2020-01-08" """
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
"2020-01-08" = date(b)"""
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)>="2020-01-14" """
+        contains("partitions=1/3 (p3)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)>"2020-01-14" """
+        contains("partitions=1/3 (p3)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
"2020-01-14"<=date(b)"""
+        contains("partitions=1/3 (p3)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
"2020-01-14" < date(b)"""
+        contains("partitions=1/3 (p3)")
+    }
+
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where  
date(b) in ("2020-01-13")"""
+        contains("partitions=2/3 (p2,p3)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
"2020-01-14" <=> date(b)"""
+        contains("partitions=1/3 (p3)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where  
date(b) <=>"2020-01-14" """
+        contains("partitions=1/3 (p3)")
+    }
+
+    // and or
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)>"2020-01-09"  and date(b) <"2020-01-13" """
+        contains("partitions=1/3 (p2)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)>"2020-01-09"  or date(b) <"2020-01-13" """
+        contains("partitions=3/3 (p1,p2,p3)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where 
date(b)>"2020-01-14"  or date(b) <"2020-01-06" """
+        contains("partitions=2/3 (p1,p3)")
+    }
+
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where date(b) 
between "2020-01-09"  and "2020-01-13" """
+        contains("partitions=3/3 (p1,p2,p3)")
+
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where date(b) 
between "2020-01-10"  and "2020-01-14" """
+        contains("partitions=2/3 (p2,p3)")
+
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where date(b) 
between "2020-01-10"  and "2020-01-12" """
+        contains("partitions=1/3 (p2)")
+    }
+
+    // test not
+    for (int i = 0; i < 2; i++) {
+        if (i == 0) {
+            // forbid rewrite not a>1 to a<=1
+            sql "set disable_nereids_rules = 'REWRITE_FILTER_EXPRESSION'"
+        } else {
+            sql "set disable_nereids_rules = ''"
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
date(b)<="2020-01-14" """
+            contains("partitions=1/3 (p3)")
+        }
+        explain {
+            sql """ select * from mal_test_partition_range5_date_mono where 
not date(b)<"2020-01-14" """
+            contains("partitions=1/3 (p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
"2020-01-14">=date(b)"""
+            contains("partitions=1/3 (p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
"2020-01-14" > date(b)"""
+            contains("partitions=1/3 (p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
date(b)="2020-01-08" """
+            contains("partitions=3/3 (p1,p2,p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
"2020-01-08" = date(b)"""
+            contains("partitions=3/3 (p1,p2,p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
date(b)>="2020-01-9" """
+            contains("partitions=1/3 (p1)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
date(b)>"2020-01-9" """
+            contains("partitions=2/3 (p1,p2)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
"2020-01-9"<=date(b)"""
+            contains("partitions=1/3 (p1)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
"2020-01-9" < date(b)"""
+            contains("partitions=2/3 (p1,p2)")
+        }
+        explain {
+            sql """ select * from mal_test_partition_range5_date_mono where  
not date(b) in ("2020-01-13 00:00:00")"""
+            contains("partitions=3/3 (p1,p2,p3)")
+        }
+        explain {
+            sql """ select * from mal_test_partition_range5_date_mono where 
not "2020-01-14" <=> date(b)"""
+            contains("partitions=3/3 (p1,p2,p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
date(b) <=>"2020-01-14" """
+            contains("partitions=3/3 (p1,p2,p3)")
+        }
+        explain {
+            sql """ select * from mal_test_partition_range5_date_mono where 
not  (date(b)>"2020-01-09"  and date(b) <"2020-01-13") """
+            contains("partitions=3/3 (p1,p2,p3)")
+        }
+        explain {
+            sql """ select * from mal_test_partition_range5_date_mono where 
not (date(b)>="2020-01-13"  or date(b) <="2020-01-9") """
+            contains("partitions=1/3 (p2)")
+        }
+        explain {
+            sql """ select * from mal_test_partition_range5_date_mono where 
not date(b)<="2020-01-14"  or date(b) <"2020-01-06" """
+            contains("partitions=2/3 (p1,p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
date(b) between "2020-01-09"  and "2020-01-13" """
+            contains("partitions=2/3 (p1,p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where not 
date(b) between "2020-01-10"  and "2020-01-14" """
+            contains("partitions=3/3 (p1,p2,p3)")
+        }
+        explain {
+            sql """select * from mal_test_partition_range5_date_mono where 
date(b) not between "2020-01-4"  and "2020-01-15" """
+            contains("partitions=1/3 (p3)")
+        }
+    }
+
+    // trunc(b) and b
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where (not 
date(b)<="2020-01-14") or b<"2020-01-9" """
+        contains("partitions=2/3 (p1,p3)")
+    }
+    explain {
+        sql """select * from mal_test_partition_range5_date_mono where  
date(b)<"2020-01-13" and  b>"2020-01-10" """
+        contains("partitions=1/3 (p2)")
+    }
+    explain {
+        sql """ select * from mal_test_partition_range5_date_mono where  
date(b)<"2020-01-13" and  b>"2020-01-9" """
+        contains("partitions=2/3 (p1,p2)")
+    }
+    explain {
+        sql """ select * from mal_test_partition_range5_date_mono where ( 
date(b)<="2020-01-14" and b >"2020-01-12") or b<"2020-01-9" """
+        contains("partitions=3/3 (p1,p2,p3)")
+    }
+    explain {
+        sql """ select * from mal_test_partition_range5_date_mono where ( 
date(b)<="2020-01-14" and b >"2020-01-19") or b<"2020-01-9" """
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """ select * from mal_test_partition_range5_date_mono where ( 
date(b)<="2020-01-14" and b >"2020-01-20") or b<"2020-01-9" """
+        contains("partitions=1/3 (p1)")
+    }
+    explain {
+        sql """ select * from mal_test_partition_range5_date_mono where ( not 
date(b)<="2020-01-14" or b >"2020-01-20") or b<"2020-01-9" """
+        contains("partitions=2/3 (p1,p3)")
+    }
+    explain {
+        sql """ select * from mal_test_partition_range5_date_mono where (  
date(b) between "2020-01-14" and "2020-01-20") or b<"2020-01-9" """
+        contains("partitions=2/3 (p1,p3)")
+    }
+
+    // is null, can support but now not
+    sql "drop table if exists null_range_date_mono"
+    sql """
+        create table null_range_date_mono(
+        k0 datetime null
+        )
+        partition by range (k0)
+        (
+        PARTITION p10 values less than ('2022-01-01 10:00:00'),
+        PARTITION p100 values less than ('2022-01-04 10:00:00'),
+        PARTITION pMAX values less than (maxvalue)
+        )
+        DISTRIBUTED BY HASH(`k0`) BUCKETS 1 properties("replication_num"="1")
+    """
+    sql "insert into null_range_date_mono values('2022-01-03 
10:00:00'),('2019-01-01 10:00:00'),('2022-01-02 10:00:00'),('2024-01-01 
10:00:00'),(null);"
+    explain {
+        sql "select * from null_range_date_mono where date(k0) is null"
+        contains("partitions=3/3 (p10,p100,pMAX)")
+    }
+    // test infinite range
+    explain {
+        sql "select * from null_range_date_mono where date(k0) <'2022-1-3'"
+        contains("partitions=2/3 (p10,p100)")
+    }
+    explain {
+        sql "select * from null_range_date_mono where date(k0) >'2022-1-3'"
+        contains("partitions=2/3 (p100,pMAX)")
+    }
+
+    sql "drop table if exists mal_test_partition_range2_two_date_int_date_mono"
+    sql """CREATE TABLE `mal_test_partition_range2_two_date_int_date_mono` (
+          `dt` DATETIME NULL,
+          `id` INT NULL,
+          `c` INT NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`dt`, `id`, `c`)
+        PARTITION BY RANGE(`dt`, `id`)
+        (PARTITION p201701_1000 VALUES [('0000-01-01', "-2147483648"), 
('2017-02-01', "1000")),
+        PARTITION p201702_2000 VALUES [('2017-02-01', "1000"), ('2017-03-01', 
"2000")),
+        PARTITION p201703_all VALUES [('2017-03-01', "2000"), ('2017-04-01', 
"-2147483648")))
+        DISTRIBUTED BY HASH(`dt`) BUCKETS 10
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );"""
+    sql """insert into mal_test_partition_range2_two_date_int_date_mono 
values('2017-01-03 10:00:00', 3,23),('2017-02-04 10:00:00', 333,4),('2017-03-05 
10:00:00', 1222,6);"""
+    explain {
+        sql """select * from mal_test_partition_range2_two_date_int_date_mono 
where date(dt) = '2017-2-1 00:00:00' and id>0 ;"""
+        contains ("partitions=2/3 (p201701_1000,p201702_2000)")
+    }
+    explain {
+        sql "select * from mal_test_partition_range2_two_date_int_date_mono 
where date(dt) > '2017-2-1  00:00:00' and id>100;"
+        contains("partitions=2/3 (p201702_2000,p201703_all)")
+    }
+
+    explain {
+        sql "select * from mal_test_partition_range2_two_date_int_date_mono 
where date(date_trunc(dt,'hour')) > '2017-2-1  00:00:00' and id>100;"
+        contains("partitions=2/3 (p201702_2000,p201703_all)")
+    }
+    // test nest function
+    explain {
+        sql "select * from mal_test_partition_range2_two_date_int_date_mono 
where date(date_trunc(dt,'minute')) > '2017-2-1  00:00:00' and id>100;"
+        contains("partitions=2/3 (p201702_2000,p201703_all)")
+    }
+    explain {
+        sql "select * from mal_test_partition_range2_two_date_int_date_mono 
where date_trunc(date(dt),'minute') > '2017-2-1  00:00:00' and id>100;"
+        contains("partitions=2/3 (p201702_2000,p201703_all)")
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to