This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 21e5374f5a2 [fix](inverted index) Error parsing NULL_LITERAL (#37842) 21e5374f5a2 is described below commit 21e5374f5a277c5ec9d05cb3727e95a612661e16 Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Tue Jul 16 11:19:22 2024 +0800 [fix](inverted index) Error parsing NULL_LITERAL (#37842) ## Proposed changes 1. NULL_LITERAL and other LITERAL will enter the else process incorrectly 2. SELECT * FROM table_name WHERE column_name NOT IN (1, 2, NULL); The NOT IN operator returns an empty result set when the collection contains NULL. --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 6 +- be/src/pipeline/exec/scan_operator.cpp | 19 ++- .../test_index_inlist_fault_injection.out | 36 +++++ .../test_index_rqg_bug4.out} | 31 ++-- .../test_index_inlist_fault_injection.groovy | 23 +++ .../inverted_index_p0/test_index_rqg_bug2.groovy | 2 +- .../inverted_index_p0/test_index_rqg_bug4.groovy | 162 +++++++++++++++++++++ 7 files changed, 250 insertions(+), 29 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 6a66ae6fc14..a0b893bb24b 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -753,6 +753,7 @@ bool SegmentIterator::_is_literal_node(const TExprNodeType::type& node_type) { case TExprNodeType::DECIMAL_LITERAL: case TExprNodeType::STRING_LITERAL: case TExprNodeType::DATE_LITERAL: + case TExprNodeType::NULL_LITERAL: return true; default: return false; @@ -2811,7 +2812,8 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( } else if (_is_literal_node(node_type)) { auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get()); _column_predicate_info->query_values.insert(v_literal_expr->value()); - } else { + } else if (node_type == TExprNodeType::BINARY_PRED || node_type == TExprNodeType::MATCH_PRED || + node_type == TExprNodeType::IN_PRED) { if (node_type == TExprNodeType::MATCH_PRED) { _column_predicate_info->query_op = "match"; } else if (node_type == TExprNodeType::IN_PRED) { @@ -2820,7 +2822,7 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( } else { _column_predicate_info->query_op = "not_in"; } - } else if (node_type != TExprNodeType::COMPOUND_PRED) { + } else { _column_predicate_info->query_op = expr->fn().name.function_name; } diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index a72514fd0b1..c3fd0457d5e 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -1100,18 +1100,12 @@ Status ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate( auto hybrid_set = expr->get_set_func(); if (hybrid_set != nullptr) { - if (hybrid_set->size() <= - _parent->cast<typename Derived::Parent>()._max_pushdown_conditions_per_column) { - iter = hybrid_set->begin(); - } else { - _filter_predicates.in_filters.emplace_back(slot->col_name(), expr->get_set_func()); - *pdt = PushDownType::ACCEPTABLE; - return Status::OK(); - } + *pdt = PushDownType::UNACCEPTABLE; + return Status::OK(); } else { - vectorized::VInPredicate* pred = static_cast<vectorized::VInPredicate*>(expr); + auto* pred = static_cast<vectorized::VInPredicate*>(expr); - vectorized::InState* state = reinterpret_cast<vectorized::InState*>( + auto* state = reinterpret_cast<vectorized::InState*>( expr_ctx->fn_context(pred->fn_context_index()) ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); @@ -1120,6 +1114,11 @@ Status ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate( } iter = state->hybrid_set->begin(); + + if (state->hybrid_set->contain_null()) { + *pdt = PushDownType::UNACCEPTABLE; + return Status::OK(); + } } while (iter->has_next()) { diff --git a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out index 9fbd1c8e252..7e9336a12c9 100644 --- a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out +++ b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out @@ -17,3 +17,39 @@ -- !sql -- 208 +-- !sql -- +30 + +-- !sql -- +0 + +-- !sql -- +0 + +-- !sql -- +34 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +30 + +-- !sql -- +0 + +-- !sql -- +0 + +-- !sql -- +34 + +-- !sql -- +2 + +-- !sql -- +2 + diff --git a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out similarity index 57% copy from regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out copy to regression-test/data/inverted_index_p0/test_index_rqg_bug4.out index 9fbd1c8e252..784795cbbc1 100644 --- a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out +++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out @@ -1,19 +1,18 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !sql -- -8 - --- !sql -- -996 - --- !sql -- -210 - --- !sql -- -8 - --- !sql -- -998 - --- !sql -- -208 +\N +a +b +f +h +i +j +k +l +o +p +q +v +y +z diff --git a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy index 52f9ed5c656..57ec94d9f99 100644 --- a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy +++ b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy @@ -90,6 +90,29 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") { } finally { GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index") } + + try { + sql """ set enable_common_expr_pushdown = true; """ + + qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip not in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + + sql """ set enable_common_expr_pushdown = false; """ + + qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip not in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + + sql """ set enable_common_expr_pushdown = true; """ + } finally { + } } finally { } } \ No newline at end of file diff --git a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy index bddbfdc4ab0..faede0bc3c1 100644 --- a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy +++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_index_rqg_bug2", "test_index_rqg_bug2"){ +suite("test_index_rqg_bug2", "test_index_rqg_bug"){ def table1 = "test_index_rqg_bug2" sql "drop table if exists ${table1}" diff --git a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy new file mode 100644 index 00000000000..9353616f95f --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("test_index_rqg_bug4", "test_index_rqg_bug"){ + def table1 = "test_index_rqg_bug4_table1" + def table2 = "test_index_rqg_bug4_table2" + + sql "drop table if exists ${table1}" + sql "drop table if exists ${table2}" + + sql """ + create table ${table1} ( + col_date_undef_signed_not_null_index_inverted date not null , + col_bigint_undef_signed_not_null_index_inverted bigint not null , + col_bigint_undef_signed_not_null bigint not null , + col_int_undef_signed int null , + col_int_undef_signed_index_inverted int null , + col_int_undef_signed_not_null int not null , + col_int_undef_signed_not_null_index_inverted int not null , + col_bigint_undef_signed bigint null , + col_bigint_undef_signed_index_inverted bigint null , + col_date_undef_signed date null , + col_date_undef_signed_index_inverted date null , + col_date_undef_signed_not_null date not null , + col_varchar_10__undef_signed varchar(10) null , + col_varchar_10__undef_signed_index_inverted varchar(10) null , + col_varchar_10__undef_signed_not_null varchar(10) not null , + col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not null , + col_varchar_1024__undef_signed varchar(1024) null , + col_varchar_1024__undef_signed_index_inverted varchar(1024) null , + col_varchar_1024__undef_signed_not_null varchar(1024) not null , + col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024) not null , + pk int, + INDEX col_int_undef_signed_index_inverted_idx (`col_int_undef_signed_index_inverted`) USING INVERTED, + INDEX col_int_undef_signed_not_null_index_inverted_idx (`col_int_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_index_inverted_idx (`col_bigint_undef_signed_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_not_null_index_inverted_idx (`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_index_inverted_idx (`col_date_undef_signed_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_not_null_index_inverted_idx (`col_date_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_index_inverted_idx (`col_varchar_10__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx (`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_index_inverted_idx (`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx (`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED + ) engine=olap + UNIQUE KEY(col_date_undef_signed_not_null_index_inverted, col_bigint_undef_signed_not_null_index_inverted, col_bigint_undef_signed_not_null) + PARTITION BY RANGE(col_date_undef_signed_not_null_index_inverted) ( + FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY, + FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY, + FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY, + FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY, + FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY + ) + distributed by hash(col_bigint_undef_signed_not_null_index_inverted) + properties("enable_unique_key_merge_on_write" = "true", "replication_num" = "1"); + """ + + sql """ + create table ${table2} ( + col_date_undef_signed_not_null date not null , + col_bigint_undef_signed_not_null_index_inverted bigint not null , + col_bigint_undef_signed_not_null bigint not null , + col_int_undef_signed int null , + col_int_undef_signed_index_inverted int null , + col_int_undef_signed_not_null int not null , + col_int_undef_signed_not_null_index_inverted int not null , + col_bigint_undef_signed bigint null , + col_bigint_undef_signed_index_inverted bigint null , + col_date_undef_signed date null , + col_date_undef_signed_index_inverted date null , + col_date_undef_signed_not_null_index_inverted date not null , + col_varchar_10__undef_signed varchar(10) null , + col_varchar_10__undef_signed_index_inverted varchar(10) null , + col_varchar_10__undef_signed_not_null varchar(10) not null , + col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not null , + col_varchar_1024__undef_signed varchar(1024) null , + col_varchar_1024__undef_signed_index_inverted varchar(1024) null , + col_varchar_1024__undef_signed_not_null varchar(1024) not null , + col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024) not null , + pk int, + INDEX col_int_undef_signed_index_inverted_idx (`col_int_undef_signed_index_inverted`) USING INVERTED, + INDEX col_int_undef_signed_not_null_index_inverted_idx (`col_int_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_index_inverted_idx (`col_bigint_undef_signed_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_not_null_index_inverted_idx (`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_index_inverted_idx (`col_date_undef_signed_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_not_null_index_inverted_idx (`col_date_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_index_inverted_idx (`col_varchar_10__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx (`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_index_inverted_idx (`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx (`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED + ) engine=olap + UNIQUE KEY(col_date_undef_signed_not_null, col_bigint_undef_signed_not_null_index_inverted, col_bigint_undef_signed_not_null) + PARTITION BY RANGE(col_date_undef_signed_not_null) ( + PARTITION p0 VALUES LESS THAN ('2023-12-11'), + PARTITION p1 VALUES LESS THAN ('2023-12-15'), + PARTITION p2 VALUES LESS THAN ('2023-12-16'), + PARTITION p3 VALUES LESS THAN ('2023-12-25'), + PARTITION p4 VALUES LESS THAN ('2024-01-18'), + PARTITION p5 VALUES LESS THAN ('2026-02-18'), + PARTITION p6 VALUES LESS THAN ('5024-02-18'), + PARTITION p100 VALUES LESS THAN ('9999-12-31') + ) + distributed by hash(col_bigint_undef_signed_not_null_index_inverted) + properties("enable_unique_key_merge_on_write" = "true", "replication_num" = "1"); + """ + + sql """ + insert into ${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde [...] + """ + + sql """ + insert into ${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde [...] + """ + + try { + sql "sync" + + qt_sql """ + select + table1.col_varchar_1024__undef_signed_not_null as field1 + from + ${table1} as table1 + right join ${table2} as table2 on ( + table2.col_date_undef_signed_index_inverted = table1.col_date_undef_signed_index_inverted + ) + where + not ( + ( + table2.`col_date_undef_signed_not_null_index_inverted` in ( + '2027-01-16', + '2023-12-17', + '2024-02-18', + null, + '2000-10-18', + '2023-12-14', + '2023-12-18' + ) + ) + and table2.`col_date_undef_signed_not_null_index_inverted` < '2025-06-18' + ) + group by + field1 + order by + field1 + limit + 10000; + """ + } finally { + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org