This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 6de8e4be862 [fix] (nereids) fix Match Expreesion in filter estimation 
(#39050) (#39216)
6de8e4be862 is described below

commit 6de8e4be8621f06ada8a3b560322cca363f431ba
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Tue Aug 13 10:55:41 2024 +0800

    [fix] (nereids) fix Match Expreesion in filter estimation (#39050) (#39216)
    
    ## Proposed changes
    
    pick from master #39050
---
 .../doris/nereids/stats/FilterEstimation.java      |  7 ++-
 .../data/inverted_index_p0/test_or_not_match.out   |  5 ++
 .../inverted_index_p0/test_or_not_match.groovy     | 69 ++++++++++++++++++++++
 3 files changed, 80 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index 0ce10ec0c3c..b4b4fa5e3f8 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.IsNull;
 import org.apache.doris.nereids.trees.expressions.LessThan;
 import org.apache.doris.nereids.trees.expressions.LessThanEqual;
 import org.apache.doris.nereids.trees.expressions.Like;
+import org.apache.doris.nereids.trees.expressions.Match;
 import org.apache.doris.nereids.trees.expressions.Not;
 import org.apache.doris.nereids.trees.expressions.NullSafeEqual;
 import org.apache.doris.nereids.trees.expressions.Or;
@@ -480,7 +481,8 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
                         child instanceof EqualPredicate
                                 || child instanceof InPredicate
                                 || child instanceof IsNull
-                                || child instanceof Like,
+                                || child instanceof Like
+                                || child instanceof Match,
                         "Not-predicate meet unexpected child: %s", 
child.toSql());
                 if (child instanceof Like) {
                     rowCount = context.statistics.getRowCount() - 
childStats.getRowCount();
@@ -503,6 +505,9 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
                             .setMinExpr(originColStats.minExpr)
                             .setMaxValue(originColStats.maxValue)
                             .setMaxExpr(originColStats.maxExpr);
+                } else if (child instanceof Match) {
+                    rowCount = context.statistics.getRowCount() - 
childStats.getRowCount();
+                    colBuilder.setNdv(Math.max(1.0, originColStats.ndv - 
childColStats.ndv));
                 }
                 if (not.child().getInputSlots().size() == 1 && !(child 
instanceof IsNull)) {
                     // only consider the single column numNull, otherwise, 
ignore
diff --git a/regression-test/data/inverted_index_p0/test_or_not_match.out 
b/regression-test/data/inverted_index_p0/test_or_not_match.out
new file mode 100644
index 00000000000..22dde7a8bf7
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_or_not_match.out
@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+GET / HTTP/1.0
+GET / HTTP/1.0
+
diff --git a/regression-test/suites/inverted_index_p0/test_or_not_match.groovy 
b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy
new file mode 100644
index 00000000000..95af26480c9
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_or_not_match", "p0") {
+    def tableName = "test_or_not_match"
+    sql "DROP TABLE IF EXISTS ${tableName}"
+    sql """
+      CREATE TABLE ${tableName} (
+      `@timestamp` int(11) NULL COMMENT "",
+      `clientip` varchar(20) NULL COMMENT "",
+      `request` text NULL COMMENT "",
+      `status` int(11) NULL COMMENT "",
+      `size` int(11) NULL COMMENT "",
+      INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+      "replication_allocation" = "tag.location.default: 1"
+      );
+    """
+    // load the json data
+    streamLoad {
+        table "${tableName}"
+        
+        // set http request header params
+        set 'read_json_by_line', 'true'
+        set 'format', 'json'
+        file 'documents-1000.json' // import json file
+        time 10000 // limit inflight 10s
+
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+        }
+    }
+    for (int i = 0; i < 10; i++) {
+        sql "select request from ${tableName} where request like '1.0' or not 
request MATCH 'GETA';"
+    }
+
+    sql "set enable_nereids_planner = true"
+    sql "set enable_fallback_to_original_planner = false"
+
+    qt_sql "select request from ${tableName} where request like '1.0' or not 
request MATCH 'GETA' order by request limit 2;"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to