This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new e7395198d48 [fix](estimate) func call with not filter will estimate some statisti… (#42302) e7395198d48 is described below commit e7395198d48d4430c8c8f9a3ba8ecc2bf28b3546 Author: amory <wangqian...@selectdb.com> AuthorDate: Wed Oct 23 14:33:41 2024 +0800 [fix](estimate) func call with not filter will estimate some statisti… (#42302) …cs (#41989) before this pr: use ! or not in or condition when table has been analyzed we will meet ``` SELECT count(1) FROM table_30_un_pa_ke_pr_di4 where col_int_undef_signed_not_null < -128 or not array_contains(col_array_bigint__undef_signed, col_int_undef_signed_not_null); ERROR 1105 (HY000): errCode = 2, detailMessage = Not-predicate meet unexpected child: array_contains(col_array_bigint__undef_signed, cast(col_int_undef_signed_not_null as BIGINT)) ``` ## Proposed changes Issue Number: close #xxx <!--Describe your changes.--> --- be/src/vec/functions/array/function_array_index.h | 8 ++ .../vec/functions/array/function_arrays_overlap.h | 8 ++ .../doris/nereids/stats/FilterEstimation.java | 4 +- .../data/inverted_index_p0/tai_estimate.csv | 149 +++++++++++++++++++++ .../test_array_contains_estimate.out | 4 + .../test_array_contains_estimate.groovy | 94 +++++++++++++ 6 files changed, 266 insertions(+), 1 deletion(-) diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index 7e709bfd817..a2e26c2d212 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -169,6 +169,14 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { + DBUG_EXECUTE_IF("array_func.array_contains", { + auto req_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>( + "array_func.array_contains", "req_id", 0); + return Status::Error<ErrorCode::INTERNAL_ERROR>( + "{} has already execute inverted index req_id {} , should not execute expr " + "with rows: {}", + get_name(), req_id, input_rows_count); + }); return _execute_dispatch(block, arguments, result, input_rows_count); } diff --git a/be/src/vec/functions/array/function_arrays_overlap.h b/be/src/vec/functions/array/function_arrays_overlap.h index fac5264d027..a4152f15039 100644 --- a/be/src/vec/functions/array/function_arrays_overlap.h +++ b/be/src/vec/functions/array/function_arrays_overlap.h @@ -207,6 +207,14 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { + DBUG_EXECUTE_IF("array_func.arrays_overlap", { + auto req_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>( + "array_func.arrays_overlap", "req_id", 0); + return Status::Error<ErrorCode::INTERNAL_ERROR>( + "{} has already execute inverted index req_id {} , should not execute expr " + "with rows: {}", + get_name(), req_id, input_rows_count); + }); auto left_column = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); auto right_column = diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index df0edf8b159..c942b18ca98 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -509,13 +509,15 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo // 2. not A in (...) // 3. not A is null // 4. not A like XXX + // 5. not array_contains([xx, xx], xx) colBuilder.setNumNulls(0); Preconditions.checkArgument( child instanceof EqualPredicate || child instanceof InPredicate || child instanceof IsNull || child instanceof Like - || child instanceof Match, + || child instanceof Match + || child instanceof Function, "Not-predicate meet unexpected child: %s", child.toSql()); if (child instanceof Like) { rowCount = context.statistics.getRowCount() - childStats.getRowCount(); diff --git a/regression-test/data/inverted_index_p0/tai_estimate.csv b/regression-test/data/inverted_index_p0/tai_estimate.csv new file mode 100644 index 00000000000..394e8b36a69 --- /dev/null +++ b/regression-test/data/inverted_index_p0/tai_estimate.csv @@ -0,0 +1,149 @@ +2024-10-01|"123456"|['John Doe','Jane Smith'] +2024-10-02|"234567"|['Alice Johnson','Bob Lee'] +2024-10-03|"345678"|['Chris White','Dana Black'] +2024-10-04|"456789"|['Eve Green','Frank Blue'] +2024-10-05|"567890"|['Grace Yellow','Hank Red'] +2024-10-06|"678901"|['Ivy Gray','Jack Silver'] +2024-10-07|"789012"|['Karen Brown','Larry Orange'] +2024-10-08|"890123"|['Monica Purple','Nate Gold'] +2024-10-09|"901234"|['Olivia Pink','Peter Bronze'] +2024-10-10|"012345"|['Quinn Black','Rachel Cyan'] +2024-10-11|"112345"|['Steve Magenta','Tom Indigo'] +2024-10-12|"212345"|['Ursula Violet','Victor Maroon'] +2024-10-13|"312345"|['Wendy Lime','Xander Olive'] +2024-10-14|"412345"|['Yara Teal','Zach Plum'] +2024-10-15|"512345"|['Ada Amber','Ben Copper'] +2024-10-16|"612345"|['Cathy Crimson','Dan Saffron'] +2024-10-17|"712345"|['Eliot Coral','Fiona Burgundy'] +2024-10-18|"812345"|['George Navy','Holly Peach'] +2024-10-19|"912345"|['Irene Lavender','Jake Chartreuse'] +2024-10-20|"013456"|['Karl Slate','Laura Fuchsia'] +2024-10-21|"113456"|['Matt Cyan','Nancy Azure'] +2024-10-22|"213456"|['Oscar Beige','Pam Lilac'] +2024-10-23|"313456"|['Quincy Rose','Rita Steel'] +2024-10-24|"413456"|['Sam Mint','Tina Lemon'] +2024-10-25|"513456"|['Ugo Brass','Vera Aquamarine'] +2024-10-26|"613456"|['Walt Jade','Xena Amethyst'] +2024-10-27|"713456"|['Yuri Copper','Zoe Emerald'] +2024-10-28|"813456"|['Alan Graphite','Bea Indigo'] +2024-10-29|"913456"|['Carl Magenta','Dina Quartz'] +2024-10-30|"023456"|['Evan Ruby','Faith Scarlet'] +2024-10-31|"123456"|['Greg Topaz','Helen Ivory'] +2024-11-01|"223456"|['Ian Tan','Jane Garnet'] +2024-11-02|"323456"|['Kyle Pearl','Lily Denim'] +2024-11-03|"423456"|['Mark Bronze','Nina Bronze'] +2024-11-04|"523456"|['Oscar Citrine','Paula Peridot'] +2024-11-05|"623456"|['Quinn Jasper','Rita Amber'] +2024-11-06|"723456"|['Steve Zircon','Tina Opal'] +2024-11-07|"823456"|['Uma Amber','Vera Turquoise'] +2024-11-08|"923456"|['Wendy Crystal','Xander Ivory'] +2024-11-09|"033456"|['Yara Onyx','Zach Emerald'] +2024-11-10|"133456"|['Alan Ruby','Bea Gold'] +2024-11-11|"233456"|['Cathy Diamond','Dan Garnet'] +2024-11-12|"333456"|['Eliot Sapphire','Fiona Sapphire'] +2024-11-13|"433456"|['George Jade','Holly Pearl'] +2024-11-14|"533456"|['Ivy Topaz','Jake Bronze'] +2024-11-15|"633456"|['Karl Pearl','Laura Ivory'] +2024-11-16|"733456"|['Matt Gold','Nancy Silver'] +2024-11-17|"833456"|['Oscar Ivory','Pam Onyx'] +2024-11-18|"933456"|['Amory Wang','Being Committer'] +2024-11-19|"043456"|['Sam Garnet','Tina Crystal'] +2024-11-20|"143456"|['Ugo Jasper','Vera Sapphire'] +2024-11-21|"243456"|['Walt Sapphire','Xena Opal'] +2024-11-22|"343456"|['Yuri Emerald','Zoe Gold'] +2024-11-23|"443456"|['Alan Jade','Bea Pearl'] +2024-11-24|"543456"|['Cathy Opal','Dan Ivory'] +2024-11-25|"643456"|['Eliot Bronze','Fiona Ruby'] +2024-11-26|"743456"|['George Ivory','Holly Jade'] +2024-11-27|"843456"|['Irene Sapphire','Jake Bronze'] +2024-11-28|"943456"|['Karl Emerald','Laura Topaz'] +2024-11-29|"053456"|['Matt Ivory','Nancy Ruby'] +2024-11-30|"153456"|['Oscar Bronze','Pam Sapphire'] +2024-12-01|"253456"|['Quinn Pearl','Rita Emerald'] +2024-12-02|"353456"|['Sam Ruby','Tina Ivory'] +2024-12-03|"453456"|['Ugo Onyx','Vera Pearl'] +2024-12-04|"553456"|['Walt Topaz','Xena Gold'] +2024-12-05|"653456"|['Yuri Bronze','Zoe Ruby'] +2024-12-06|"753456"|['Alan Sapphire','Bea Garnet'] +2024-12-07|"853456"|['Cathy Emerald','Dan Ruby'] +2024-12-08|"953456"|['Eliot Sapphire','Fiona Pearl'] +2024-12-09|"063456"|['George Gold','Holly Sapphire'] +2024-12-10|"163456"|['Irene Bronze','Jake Emerald'] +2024-12-11|"263456"|['Karl Ruby','Laura Onyx'] +2024-12-12|"363456"|['Matt Pearl','Nancy Ivory'] +2024-12-13|"463456"|['Oscar Topaz','Pam Gold'] +2024-12-14|"563456"|['Quinn Ruby','Rita Sapphire'] +2024-12-15|"663456"|['Sam Garnet','Tina Pearl'] +2024-12-16|"763456"|['Ugo Jade','Vera Bronze'] +2024-12-17|"863456"|['Walt Ruby','Xena Emerald'] +2024-12-18|"963456"|['Yuri Pearl','Zoe Onyx'] +2024-12-19|"073456"|['Adam Jade','Bella Onyx'] +2024-12-20|"173456"|['Cody Ruby','Diana Pearl'] +2024-12-21|"273456"|['Eliza Bronze','Frank Sapphire'] +2024-12-22|"373456"|['Gina Emerald','Hank Gold'] +2024-12-23|"473456"|['Isaac Pearl','Julia Ruby'] +2024-12-24|"573456"|['Kyle Onyx','Luna Ivory'] +2024-12-25|"673456"|['Mona Ruby','Nick Emerald'] +2024-12-26|"773456"|['Olga Sapphire','Paul Topaz'] +2024-12-27|"873456"|['Quincy Ivory','Rachel Garnet'] +2024-12-28|"973456"|['Steve Onyx','Tina Sapphire'] +2024-12-29|"083456"|['Uma Ruby','Victor Pearl'] +2024-12-30|"183456"|['Wendy Topaz','Xander Bronze'] +2024-12-31|"283456"|['Yara Emerald','Zane Ruby'] +2025-01-01|"383456"|['Alan Sapphire','Bea Garnet'] +2025-01-02|"483456"|['Cathy Ruby','Dan Onyx'] +2025-01-03|"583456"|['Eliot Pearl','Fiona Topaz'] +2025-01-04|"683456"|['George Sapphire','Holly Emerald'] +2025-01-05|"783456"|['Isaac Bronze','Julia Topaz'] +2025-01-06|"883456"|['Karl Onyx','Laura Sapphire'] +2025-01-07|"983456"|['Matt Ruby','Nancy Garnet'] +2025-01-08|"093456"|['Oscar Emerald','Pam Onyx'] +2025-01-09|"193456"|['Quinn Ruby','Rita Pearl'] +2025-01-10|"293456"|['Sam Sapphire','Tina Garnet'] +2025-01-11|"393456"|['Ugo Onyx','Vera Ruby'] +2025-01-12|"493456"|['Walt Topaz','Xena Sapphire'] +2025-01-13|"593456"|['Yuri Garnet','Zoe Onyx'] +2025-01-14|"693456"|['Adam Ruby','Bella Pearl'] +2025-01-15|"793456"|['Cody Sapphire','Diana Emerald'] +2025-01-16|"893456"|['Eliza Ruby','Frank Pearl'] +2025-01-17|"993456"|['Gina Onyx','Hank Garnet'] +2025-01-18|"103456"|['Isaac Sapphire','Julia Ruby'] +2025-01-19|"203456"|['Kyle Topaz','Luna Emerald'] +2025-01-20|"303456"|['Mona Ruby','Nick Pearl'] +2025-01-21|"403456"|['Olga Garnet','Paul Onyx'] +2025-01-22|"503456"|['Quincy Emerald','Rachel Sapphire'] +2025-01-23|"603456"|['Steve Ruby','Tina Pearl'] +2025-01-24|"703456"|['Uma Garnet','Victor Onyx'] +2025-01-25|"803456"|['Wendy Ruby','Xander Pearl'] +2025-01-26|"903456"|['Yara Sapphire','Zane Emerald'] +2025-01-27|"113456"|['Alan Onyx','Bea Garnet'] +2025-01-28|"213456"|['Cathy Sapphire','Dan Ruby'] +2025-01-29|"313456"|['Eliot Emerald','Fiona Pearl'] +2025-01-30|"413456"|['George Garnet','Holly Onyx'] +2025-01-31|"513456"|['Isaac Ruby','Julia Sapphire'] +2025-02-01|"613456"|['Karl Pearl','Laura Emerald'] +2025-02-02|"713456"|['Matt Onyx','Nancy Ruby'] +2025-02-03|"813456"|['Oscar Sapphire','Pam Pearl'] +2025-02-04|"913456"|['Quinn Garnet','Rita Ruby'] +2025-02-05|"123456"|['Sam Onyx','Tina Emerald'] +2025-02-06|"223456"|['Ugo Sapphire','Vera Pearl'] +2025-02-07|"323456"|['Walt Ruby','Xena Onyx'] +2025-02-08|"423456"|['Yuri Emerald','Zoe Sapphire'] +2025-02-09|"523456"|['Adam Onyx','Bella Garnet'] +2025-02-10|"623456"|['Cody Ruby','Diana Sapphire'] +2025-02-11|"723456"|['Eliza Emerald','Frank Pearl'] +2025-02-12|"823456"|['Gina Ruby','Hank Sapphire'] +2025-02-13|"923456"|['Isaac Onyx','Julia Garnet'] +2025-02-14|"133456"|['Kyle Ruby','Luna Pearl'] +2025-02-15|"233456"|['Mona Emerald','Nick Sapphire'] +2025-02-16|"333456"|['Olga Ruby','Paul Garnet'] +2025-02-17|"433456"|['Quincy Pearl','Rachel Sapphire'] +2025-02-18|"533456"|['Steve Ruby','Tina Garnet'] +2025-02-19|"633456"|['Uma Onyx','Victor Pearl'] +2025-02-20|"733456"|['Wendy Sapphire','Xander Ruby'] +2025-02-21|"833456"|['Yara Garnet','Zane Onyx'] +2025-02-22|"933456"|['Alan Ruby','Bea Pearl'] +2025-02-23|"143456"|['Cathy Sapphire','Dan Onyx'] +2025-02-24|"243456"|['Eliot Ruby','Fiona Emerald'] +2025-02-25|"343456"|['George Pearl','Holly Sapphire'] +2025-02-26|"443456"|['Isaac Onyx','Julia Ruby'] \ No newline at end of file diff --git a/regression-test/data/inverted_index_p0/test_array_contains_estimate.out b/regression-test/data/inverted_index_p0/test_array_contains_estimate.out new file mode 100644 index 00000000000..8382a2f834f --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_array_contains_estimate.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +148 + diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_estimate.groovy b/regression-test/suites/inverted_index_p0/test_array_contains_estimate.groovy new file mode 100644 index 00000000000..09f2a7e891c --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_array_contains_estimate.groovy @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_contains_estimate", "nonConcurrent"){ + // prepare test table + def indexTblName = "tai_estimate" + def dataFile = "tai_estimate.csv" + + sql """ set enable_common_expr_pushdown = true; """ + sql """ set enable_profile = true;""" + sql """ set enable_inverted_index_query=true; """ + sql """ set inverted_index_skip_threshold = 0; """ // set skip threshold to 0 + + sql "DROP TABLE IF EXISTS ${indexTblName}" + // create 1 replica table + sql """ + CREATE TABLE IF NOT EXISTS `${indexTblName}` ( + `apply_date` date NULL COMMENT '', + `id` varchar(60) NOT NULL COMMENT '', + `inventors` array<text> NULL COMMENT '', + INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`apply_date`, `id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + streamLoad { + table indexTblName + + file dataFile // import csv file + time 10000 // limit inflight 10s + set 'column_separator', '|' + set 'strict_mode', 'true' + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals(149, json.NumberTotalRows) + assertEquals(149, json.NumberLoadedRows) + assertTrue(json.LoadBytes > 0) + } + } + + // test array_contains estimate + def create_sql = { + List<String> list = new ArrayList<>() + list.add("select count() from ${indexTblName} where apply_date = "2024-11-18" or !array_contains(inventors, 'Amory Wang')") + return list; + } + + def checkpoints_name = "array_func.array_contains" + def execute_sql = { sqlList -> + def i = 0 + for (sqlStr in sqlList) { + try { + log.info("execute sql: i") + GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [req_id: i]) + order_qt_sql """ ${sqlStr} """ + } finally { + GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name) + } + ++i + } + } + +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org