This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new e7395198d48 [fix](estimate) func call with not filter will estimate 
some statisti… (#42302)
e7395198d48 is described below

commit e7395198d48d4430c8c8f9a3ba8ecc2bf28b3546
Author: amory <wangqian...@selectdb.com>
AuthorDate: Wed Oct 23 14:33:41 2024 +0800

    [fix](estimate) func call with not filter will estimate some statisti… 
(#42302)
    
    …cs (#41989)
    
    before this pr: use ! or not in or condition when table has been
    analyzed we will meet
    ```
    SELECT
      count(1)
    FROM
      table_30_un_pa_ke_pr_di4
    where
      col_int_undef_signed_not_null < -128
      or not array_contains(col_array_bigint__undef_signed, 
col_int_undef_signed_not_null);
    
    ERROR 1105 (HY000): errCode = 2, detailMessage = Not-predicate meet 
unexpected child:
      array_contains(col_array_bigint__undef_signed, 
cast(col_int_undef_signed_not_null as BIGINT))
    ```
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 be/src/vec/functions/array/function_array_index.h  |   8 ++
 .../vec/functions/array/function_arrays_overlap.h  |   8 ++
 .../doris/nereids/stats/FilterEstimation.java      |   4 +-
 .../data/inverted_index_p0/tai_estimate.csv        | 149 +++++++++++++++++++++
 .../test_array_contains_estimate.out               |   4 +
 .../test_array_contains_estimate.groovy            |  94 +++++++++++++
 6 files changed, 266 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/functions/array/function_array_index.h 
b/be/src/vec/functions/array/function_array_index.h
index 7e709bfd817..a2e26c2d212 100644
--- a/be/src/vec/functions/array/function_array_index.h
+++ b/be/src/vec/functions/array/function_array_index.h
@@ -169,6 +169,14 @@ public:
 
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) const override 
{
+        DBUG_EXECUTE_IF("array_func.array_contains", {
+            auto req_id = 
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+                    "array_func.array_contains", "req_id", 0);
+            return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                    "{} has already execute inverted index req_id {} , should 
not execute expr "
+                    "with rows: {}",
+                    get_name(), req_id, input_rows_count);
+        });
         return _execute_dispatch(block, arguments, result, input_rows_count);
     }
 
diff --git a/be/src/vec/functions/array/function_arrays_overlap.h 
b/be/src/vec/functions/array/function_arrays_overlap.h
index fac5264d027..a4152f15039 100644
--- a/be/src/vec/functions/array/function_arrays_overlap.h
+++ b/be/src/vec/functions/array/function_arrays_overlap.h
@@ -207,6 +207,14 @@ public:
 
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) const override 
{
+        DBUG_EXECUTE_IF("array_func.arrays_overlap", {
+            auto req_id = 
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+                    "array_func.arrays_overlap", "req_id", 0);
+            return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                    "{} has already execute inverted index req_id {} , should 
not execute expr "
+                    "with rows: {}",
+                    get_name(), req_id, input_rows_count);
+        });
         auto left_column =
                 
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
         auto right_column =
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index df0edf8b159..c942b18ca98 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -509,13 +509,15 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
                 // 2. not A in (...)
                 // 3. not A is null
                 // 4. not A like XXX
+                // 5. not array_contains([xx, xx], xx)
                 colBuilder.setNumNulls(0);
                 Preconditions.checkArgument(
                         child instanceof EqualPredicate
                                 || child instanceof InPredicate
                                 || child instanceof IsNull
                                 || child instanceof Like
-                                || child instanceof Match,
+                                || child instanceof Match
+                                || child instanceof Function,
                         "Not-predicate meet unexpected child: %s", 
child.toSql());
                 if (child instanceof Like) {
                     rowCount = context.statistics.getRowCount() - 
childStats.getRowCount();
diff --git a/regression-test/data/inverted_index_p0/tai_estimate.csv 
b/regression-test/data/inverted_index_p0/tai_estimate.csv
new file mode 100644
index 00000000000..394e8b36a69
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/tai_estimate.csv
@@ -0,0 +1,149 @@
+2024-10-01|"123456"|['John Doe','Jane Smith']
+2024-10-02|"234567"|['Alice Johnson','Bob Lee']
+2024-10-03|"345678"|['Chris White','Dana Black']
+2024-10-04|"456789"|['Eve Green','Frank Blue']
+2024-10-05|"567890"|['Grace Yellow','Hank Red']
+2024-10-06|"678901"|['Ivy Gray','Jack Silver']
+2024-10-07|"789012"|['Karen Brown','Larry Orange']
+2024-10-08|"890123"|['Monica Purple','Nate Gold']
+2024-10-09|"901234"|['Olivia Pink','Peter Bronze']
+2024-10-10|"012345"|['Quinn Black','Rachel Cyan']
+2024-10-11|"112345"|['Steve Magenta','Tom Indigo']
+2024-10-12|"212345"|['Ursula Violet','Victor Maroon']
+2024-10-13|"312345"|['Wendy Lime','Xander Olive']
+2024-10-14|"412345"|['Yara Teal','Zach Plum']
+2024-10-15|"512345"|['Ada Amber','Ben Copper']
+2024-10-16|"612345"|['Cathy Crimson','Dan Saffron']
+2024-10-17|"712345"|['Eliot Coral','Fiona Burgundy']
+2024-10-18|"812345"|['George Navy','Holly Peach']
+2024-10-19|"912345"|['Irene Lavender','Jake Chartreuse']
+2024-10-20|"013456"|['Karl Slate','Laura Fuchsia']
+2024-10-21|"113456"|['Matt Cyan','Nancy Azure']
+2024-10-22|"213456"|['Oscar Beige','Pam Lilac']
+2024-10-23|"313456"|['Quincy Rose','Rita Steel']
+2024-10-24|"413456"|['Sam Mint','Tina Lemon']
+2024-10-25|"513456"|['Ugo Brass','Vera Aquamarine']
+2024-10-26|"613456"|['Walt Jade','Xena Amethyst']
+2024-10-27|"713456"|['Yuri Copper','Zoe Emerald']
+2024-10-28|"813456"|['Alan Graphite','Bea Indigo']
+2024-10-29|"913456"|['Carl Magenta','Dina Quartz']
+2024-10-30|"023456"|['Evan Ruby','Faith Scarlet']
+2024-10-31|"123456"|['Greg Topaz','Helen Ivory']
+2024-11-01|"223456"|['Ian Tan','Jane Garnet']
+2024-11-02|"323456"|['Kyle Pearl','Lily Denim']
+2024-11-03|"423456"|['Mark Bronze','Nina Bronze']
+2024-11-04|"523456"|['Oscar Citrine','Paula Peridot']
+2024-11-05|"623456"|['Quinn Jasper','Rita Amber']
+2024-11-06|"723456"|['Steve Zircon','Tina Opal']
+2024-11-07|"823456"|['Uma Amber','Vera Turquoise']
+2024-11-08|"923456"|['Wendy Crystal','Xander Ivory']
+2024-11-09|"033456"|['Yara Onyx','Zach Emerald']
+2024-11-10|"133456"|['Alan Ruby','Bea Gold']
+2024-11-11|"233456"|['Cathy Diamond','Dan Garnet']
+2024-11-12|"333456"|['Eliot Sapphire','Fiona Sapphire']
+2024-11-13|"433456"|['George Jade','Holly Pearl']
+2024-11-14|"533456"|['Ivy Topaz','Jake Bronze']
+2024-11-15|"633456"|['Karl Pearl','Laura Ivory']
+2024-11-16|"733456"|['Matt Gold','Nancy Silver']
+2024-11-17|"833456"|['Oscar Ivory','Pam Onyx']
+2024-11-18|"933456"|['Amory Wang','Being Committer']
+2024-11-19|"043456"|['Sam Garnet','Tina Crystal']
+2024-11-20|"143456"|['Ugo Jasper','Vera Sapphire']
+2024-11-21|"243456"|['Walt Sapphire','Xena Opal']
+2024-11-22|"343456"|['Yuri Emerald','Zoe Gold']
+2024-11-23|"443456"|['Alan Jade','Bea Pearl']
+2024-11-24|"543456"|['Cathy Opal','Dan Ivory']
+2024-11-25|"643456"|['Eliot Bronze','Fiona Ruby']
+2024-11-26|"743456"|['George Ivory','Holly Jade']
+2024-11-27|"843456"|['Irene Sapphire','Jake Bronze']
+2024-11-28|"943456"|['Karl Emerald','Laura Topaz']
+2024-11-29|"053456"|['Matt Ivory','Nancy Ruby']
+2024-11-30|"153456"|['Oscar Bronze','Pam Sapphire']
+2024-12-01|"253456"|['Quinn Pearl','Rita Emerald']
+2024-12-02|"353456"|['Sam Ruby','Tina Ivory']
+2024-12-03|"453456"|['Ugo Onyx','Vera Pearl']
+2024-12-04|"553456"|['Walt Topaz','Xena Gold']
+2024-12-05|"653456"|['Yuri Bronze','Zoe Ruby']
+2024-12-06|"753456"|['Alan Sapphire','Bea Garnet']
+2024-12-07|"853456"|['Cathy Emerald','Dan Ruby']
+2024-12-08|"953456"|['Eliot Sapphire','Fiona Pearl']
+2024-12-09|"063456"|['George Gold','Holly Sapphire']
+2024-12-10|"163456"|['Irene Bronze','Jake Emerald']
+2024-12-11|"263456"|['Karl Ruby','Laura Onyx']
+2024-12-12|"363456"|['Matt Pearl','Nancy Ivory']
+2024-12-13|"463456"|['Oscar Topaz','Pam Gold']
+2024-12-14|"563456"|['Quinn Ruby','Rita Sapphire']
+2024-12-15|"663456"|['Sam Garnet','Tina Pearl']
+2024-12-16|"763456"|['Ugo Jade','Vera Bronze']
+2024-12-17|"863456"|['Walt Ruby','Xena Emerald']
+2024-12-18|"963456"|['Yuri Pearl','Zoe Onyx']
+2024-12-19|"073456"|['Adam Jade','Bella Onyx']
+2024-12-20|"173456"|['Cody Ruby','Diana Pearl']
+2024-12-21|"273456"|['Eliza Bronze','Frank Sapphire']
+2024-12-22|"373456"|['Gina Emerald','Hank Gold']
+2024-12-23|"473456"|['Isaac Pearl','Julia Ruby']
+2024-12-24|"573456"|['Kyle Onyx','Luna Ivory']
+2024-12-25|"673456"|['Mona Ruby','Nick Emerald']
+2024-12-26|"773456"|['Olga Sapphire','Paul Topaz']
+2024-12-27|"873456"|['Quincy Ivory','Rachel Garnet']
+2024-12-28|"973456"|['Steve Onyx','Tina Sapphire']
+2024-12-29|"083456"|['Uma Ruby','Victor Pearl']
+2024-12-30|"183456"|['Wendy Topaz','Xander Bronze']
+2024-12-31|"283456"|['Yara Emerald','Zane Ruby']
+2025-01-01|"383456"|['Alan Sapphire','Bea Garnet']
+2025-01-02|"483456"|['Cathy Ruby','Dan Onyx']
+2025-01-03|"583456"|['Eliot Pearl','Fiona Topaz']
+2025-01-04|"683456"|['George Sapphire','Holly Emerald']
+2025-01-05|"783456"|['Isaac Bronze','Julia Topaz']
+2025-01-06|"883456"|['Karl Onyx','Laura Sapphire']
+2025-01-07|"983456"|['Matt Ruby','Nancy Garnet']
+2025-01-08|"093456"|['Oscar Emerald','Pam Onyx']
+2025-01-09|"193456"|['Quinn Ruby','Rita Pearl']
+2025-01-10|"293456"|['Sam Sapphire','Tina Garnet']
+2025-01-11|"393456"|['Ugo Onyx','Vera Ruby']
+2025-01-12|"493456"|['Walt Topaz','Xena Sapphire']
+2025-01-13|"593456"|['Yuri Garnet','Zoe Onyx']
+2025-01-14|"693456"|['Adam Ruby','Bella Pearl']
+2025-01-15|"793456"|['Cody Sapphire','Diana Emerald']
+2025-01-16|"893456"|['Eliza Ruby','Frank Pearl']
+2025-01-17|"993456"|['Gina Onyx','Hank Garnet']
+2025-01-18|"103456"|['Isaac Sapphire','Julia Ruby']
+2025-01-19|"203456"|['Kyle Topaz','Luna Emerald']
+2025-01-20|"303456"|['Mona Ruby','Nick Pearl']
+2025-01-21|"403456"|['Olga Garnet','Paul Onyx']
+2025-01-22|"503456"|['Quincy Emerald','Rachel Sapphire']
+2025-01-23|"603456"|['Steve Ruby','Tina Pearl']
+2025-01-24|"703456"|['Uma Garnet','Victor Onyx']
+2025-01-25|"803456"|['Wendy Ruby','Xander Pearl']
+2025-01-26|"903456"|['Yara Sapphire','Zane Emerald']
+2025-01-27|"113456"|['Alan Onyx','Bea Garnet']
+2025-01-28|"213456"|['Cathy Sapphire','Dan Ruby']
+2025-01-29|"313456"|['Eliot Emerald','Fiona Pearl']
+2025-01-30|"413456"|['George Garnet','Holly Onyx']
+2025-01-31|"513456"|['Isaac Ruby','Julia Sapphire']
+2025-02-01|"613456"|['Karl Pearl','Laura Emerald']
+2025-02-02|"713456"|['Matt Onyx','Nancy Ruby']
+2025-02-03|"813456"|['Oscar Sapphire','Pam Pearl']
+2025-02-04|"913456"|['Quinn Garnet','Rita Ruby']
+2025-02-05|"123456"|['Sam Onyx','Tina Emerald']
+2025-02-06|"223456"|['Ugo Sapphire','Vera Pearl']
+2025-02-07|"323456"|['Walt Ruby','Xena Onyx']
+2025-02-08|"423456"|['Yuri Emerald','Zoe Sapphire']
+2025-02-09|"523456"|['Adam Onyx','Bella Garnet']
+2025-02-10|"623456"|['Cody Ruby','Diana Sapphire']
+2025-02-11|"723456"|['Eliza Emerald','Frank Pearl']
+2025-02-12|"823456"|['Gina Ruby','Hank Sapphire']
+2025-02-13|"923456"|['Isaac Onyx','Julia Garnet']
+2025-02-14|"133456"|['Kyle Ruby','Luna Pearl']
+2025-02-15|"233456"|['Mona Emerald','Nick Sapphire']
+2025-02-16|"333456"|['Olga Ruby','Paul Garnet']
+2025-02-17|"433456"|['Quincy Pearl','Rachel Sapphire']
+2025-02-18|"533456"|['Steve Ruby','Tina Garnet']
+2025-02-19|"633456"|['Uma Onyx','Victor Pearl']
+2025-02-20|"733456"|['Wendy Sapphire','Xander Ruby']
+2025-02-21|"833456"|['Yara Garnet','Zane Onyx']
+2025-02-22|"933456"|['Alan Ruby','Bea Pearl']
+2025-02-23|"143456"|['Cathy Sapphire','Dan Onyx']
+2025-02-24|"243456"|['Eliot Ruby','Fiona Emerald']
+2025-02-25|"343456"|['George Pearl','Holly Sapphire']
+2025-02-26|"443456"|['Isaac Onyx','Julia Ruby']
\ No newline at end of file
diff --git 
a/regression-test/data/inverted_index_p0/test_array_contains_estimate.out 
b/regression-test/data/inverted_index_p0/test_array_contains_estimate.out
new file mode 100644
index 00000000000..8382a2f834f
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_array_contains_estimate.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+148
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_array_contains_estimate.groovy 
b/regression-test/suites/inverted_index_p0/test_array_contains_estimate.groovy
new file mode 100644
index 00000000000..09f2a7e891c
--- /dev/null
+++ 
b/regression-test/suites/inverted_index_p0/test_array_contains_estimate.groovy
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_contains_estimate", "nonConcurrent"){
+    // prepare test table
+    def indexTblName = "tai_estimate"
+    def dataFile = "tai_estimate.csv"
+
+    sql """ set enable_common_expr_pushdown = true; """
+    sql """ set enable_profile = true;"""
+    sql """ set enable_inverted_index_query=true; """
+    sql """ set inverted_index_skip_threshold = 0; """ // set skip threshold 
to 0
+
+    sql "DROP TABLE IF EXISTS ${indexTblName}"
+    // create 1 replica table
+    sql """
+       CREATE TABLE IF NOT EXISTS `${indexTblName}` (
+      `apply_date` date NULL COMMENT '',
+      `id` varchar(60) NOT NULL COMMENT '',
+      `inventors` array<text> NULL COMMENT '',
+      INDEX index_inverted_inventors(inventors) USING INVERTED  COMMENT ''
+    ) ENGINE=OLAP
+    DUPLICATE KEY(`apply_date`, `id`)
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1",
+    "is_being_synced" = "false",
+    "storage_format" = "V2",
+    "light_schema_change" = "true",
+    "disable_auto_compaction" = "false",
+    "enable_single_replica_compaction" = "false"
+    );
+    """
+
+    streamLoad {
+        table indexTblName
+
+        file dataFile // import csv file
+        time 10000 // limit inflight 10s
+        set 'column_separator', '|'
+        set 'strict_mode', 'true'
+
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals(149, json.NumberTotalRows)
+            assertEquals(149, json.NumberLoadedRows)
+            assertTrue(json.LoadBytes > 0)
+        }
+    }
+
+    // test array_contains estimate
+    def create_sql = {
+        List<String> list = new ArrayList<>()
+        list.add("select count() from ${indexTblName} where  apply_date = 
"2024-11-18" or !array_contains(inventors, 'Amory Wang')")
+        return list;
+    }
+
+    def checkpoints_name = "array_func.array_contains"
+    def execute_sql = { sqlList ->
+        def i = 0
+        for (sqlStr in sqlList) {
+            try {
+                log.info("execute sql: i")
+                GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, 
[req_id: i])
+                order_qt_sql """ ${sqlStr} """
+            } finally {
+                GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+            }
+            ++i
+        }
+    }
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to