This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 6de8e4be862 [fix] (nereids) fix Match Expreesion in filter estimation (#39050) (#39216) 6de8e4be862 is described below commit 6de8e4be8621f06ada8a3b560322cca363f431ba Author: Sun Chenyang <csun5...@gmail.com> AuthorDate: Tue Aug 13 10:55:41 2024 +0800 [fix] (nereids) fix Match Expreesion in filter estimation (#39050) (#39216) ## Proposed changes pick from master #39050 --- .../doris/nereids/stats/FilterEstimation.java | 7 ++- .../data/inverted_index_p0/test_or_not_match.out | 5 ++ .../inverted_index_p0/test_or_not_match.groovy | 69 ++++++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 0ce10ec0c3c..b4b4fa5e3f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.IsNull; import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.LessThanEqual; import org.apache.doris.nereids.trees.expressions.Like; +import org.apache.doris.nereids.trees.expressions.Match; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.Or; @@ -480,7 +481,8 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo child instanceof EqualPredicate || child instanceof InPredicate || child instanceof IsNull - || child instanceof Like, + || child instanceof Like + || child instanceof Match, "Not-predicate meet unexpected child: %s", child.toSql()); if (child instanceof Like) { rowCount = context.statistics.getRowCount() - childStats.getRowCount(); @@ -503,6 +505,9 @@ public class FilterEstimation extends ExpressionVisitor<Statistics, EstimationCo .setMinExpr(originColStats.minExpr) .setMaxValue(originColStats.maxValue) .setMaxExpr(originColStats.maxExpr); + } else if (child instanceof Match) { + rowCount = context.statistics.getRowCount() - childStats.getRowCount(); + colBuilder.setNdv(Math.max(1.0, originColStats.ndv - childColStats.ndv)); } if (not.child().getInputSlots().size() == 1 && !(child instanceof IsNull)) { // only consider the single column numNull, otherwise, ignore diff --git a/regression-test/data/inverted_index_p0/test_or_not_match.out b/regression-test/data/inverted_index_p0/test_or_not_match.out new file mode 100644 index 00000000000..22dde7a8bf7 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_or_not_match.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +GET / HTTP/1.0 +GET / HTTP/1.0 + diff --git a/regression-test/suites/inverted_index_p0/test_or_not_match.groovy b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy new file mode 100644 index 00000000000..95af26480c9 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_or_not_match", "p0") { + def tableName = "test_or_not_match" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + // load the json data + streamLoad { + table "${tableName}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + file 'documents-1000.json' // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + for (int i = 0; i < 10; i++) { + sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA';" + } + + sql "set enable_nereids_planner = true" + sql "set enable_fallback_to_original_planner = false" + + qt_sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA' order by request limit 2;" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org