This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new e2bb86e7f80 [fix](inverted index) fixed in_list condition not indexed on pipelinex (#38178) e2bb86e7f80 is described below commit e2bb86e7f806f6e14463357644b749f06d3c1e59 Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Thu Jul 25 14:42:34 2024 +0800 [fix](inverted index) fixed in_list condition not indexed on pipelinex (#38178) ## Proposed changes https://github.com/apache/doris/pull/36565 https://github.com/apache/doris/pull/37842 https://github.com/apache/doris/pull/37921 https://github.com/apache/doris/pull/37386 <!--Describe your changes.--> --- be/src/exec/olap_utils.h | 4 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 14 ++ be/src/pipeline/exec/scan_operator.cpp | 92 ++++++++---- be/src/pipeline/exec/scan_operator.h | 23 ++- be/src/vec/exec/scan/vscan_node.cpp | 3 +- .../test_index_inlist_fault_injection.out | 67 +++++++++ .../data/inverted_index_p0/test_index_rqg_bug4.out | 18 +++ .../test_index_inlist_fault_injection.groovy | 124 ++++++++++++++++ .../inverted_index_p0/test_index_rqg_bug2.groovy | 2 +- .../inverted_index_p0/test_index_rqg_bug4.groovy | 162 +++++++++++++++++++++ 10 files changed, 468 insertions(+), 41 deletions(-) diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h index d1a1be81f5d..ddf8562fea1 100644 --- a/be/src/exec/olap_utils.h +++ b/be/src/exec/olap_utils.h @@ -117,9 +117,9 @@ inline SQLFilterOp to_olap_filter_type(const std::string& function_name, bool op return opposite ? FILTER_NOT_IN : FILTER_IN; } else if (function_name == "ne") { return opposite ? FILTER_IN : FILTER_NOT_IN; - } else if (function_name == "in_list") { + } else if (function_name == "in") { return opposite ? FILTER_NOT_IN : FILTER_IN; - } else if (function_name == "not_in_list") { + } else if (function_name == "not_in") { return opposite ? FILTER_IN : FILTER_NOT_IN; } else { DCHECK(false) << "Function Name: " << function_name; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index b53949b746c..49616c63e5c 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -2342,6 +2342,15 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { return Status::EndOfFile("no more data in segment"); } + DBUG_EXECUTE_IF("segment_iterator._rowid_result_for_index", { + for (auto& iter : _rowid_result_for_index) { + if (iter.second.first) { + return Status::Error<ErrorCode::INTERNAL_ERROR>( + "_rowid_result_for_index exists true"); + } + } + }) + if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) { if (_non_predicate_columns.empty()) { return Status::InternalError("_non_predicate_columns is empty"); @@ -2731,6 +2740,11 @@ void SegmentIterator::_calculate_pred_in_remaining_conjunct_root( } else if (_is_literal_node(node_type)) { auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get()); _column_predicate_info->query_values.insert(v_literal_expr->value()); + } else if (node_type == TExprNodeType::NULL_LITERAL) { + if (!_column_predicate_info->column_name.empty()) { + auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get()); + _column_predicate_info->query_values.insert(v_literal_expr->value()); + } } else { if (node_type == TExprNodeType::MATCH_PRED) { _column_predicate_info->query_op = "match"; diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index 385517fbeaf..7322b085564 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -1034,8 +1034,10 @@ void ScanLocalState<Derived>::_normalize_compound_predicate( auto compound_fn_name = expr->fn().name.function_name; auto children_num = expr->children().size(); for (auto i = 0; i < children_num; ++i) { - auto child_expr = expr->children()[i].get(); - if (TExprNodeType::BINARY_PRED == child_expr->node_type()) { + auto* child_expr = expr->children()[i].get(); + if (TExprNodeType::BINARY_PRED == child_expr->node_type() || + TExprNodeType::IN_PRED == child_expr->node_type() || + TExprNodeType::MATCH_PRED == child_expr->node_type()) { SlotDescriptor* slot = nullptr; ColumnValueRangeType* range_on_slot = nullptr; if (_is_predicate_acting_on_slot(child_expr, in_predicate_checker, &slot, @@ -1050,30 +1052,16 @@ void ScanLocalState<Derived>::_normalize_compound_predicate( value_range.mark_runtime_filter_predicate( _is_runtime_filter_predicate); }}; - static_cast<void>(_normalize_binary_in_compound_predicate( - child_expr, expr_ctx, slot, value_range, pdt)); - }, - active_range); - - _compound_value_ranges.emplace_back(active_range); - } - } else if (TExprNodeType::MATCH_PRED == child_expr->node_type()) { - SlotDescriptor* slot = nullptr; - ColumnValueRangeType* range_on_slot = nullptr; - if (_is_predicate_acting_on_slot(child_expr, in_predicate_checker, &slot, - &range_on_slot) || - _is_predicate_acting_on_slot(child_expr, eq_predicate_checker, &slot, - &range_on_slot)) { - ColumnValueRangeType active_range = - *range_on_slot; // copy, in order not to affect the range in the _colname_to_value_range - std::visit( - [&](auto& value_range) { - Defer mark_runtime_filter_flag {[&]() { - value_range.mark_runtime_filter_predicate( - _is_runtime_filter_predicate); - }}; - static_cast<void>(_normalize_match_in_compound_predicate( - child_expr, expr_ctx, slot, value_range, pdt)); + if (TExprNodeType::BINARY_PRED == child_expr->node_type()) { + static_cast<void>(_normalize_binary_compound_predicate( + child_expr, expr_ctx, slot, value_range, pdt)); + } else if (TExprNodeType::IN_PRED == child_expr->node_type()) { + static_cast<void>(_normalize_in_and_not_in_compound_predicate( + child_expr, expr_ctx, slot, value_range, pdt)); + } else { + static_cast<void>(_normalize_match_compound_predicate( + child_expr, expr_ctx, slot, value_range, pdt)); + } }, active_range); @@ -1090,7 +1078,7 @@ void ScanLocalState<Derived>::_normalize_compound_predicate( template <typename Derived> template <PrimitiveType T> -Status ScanLocalState<Derived>::_normalize_binary_in_compound_predicate( +Status ScanLocalState<Derived>::_normalize_binary_compound_predicate( vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) { DCHECK(expr->children().size() == 2); @@ -1149,7 +1137,55 @@ Status ScanLocalState<Derived>::_normalize_binary_in_compound_predicate( template <typename Derived> template <PrimitiveType T> -Status ScanLocalState<Derived>::_normalize_match_in_compound_predicate( +Status ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate( + vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, + ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) { + if (TExprNodeType::IN_PRED == expr->node_type()) { + std::string fn_name = expr->op() == TExprOpcode::type::FILTER_IN ? "in" : "not_in"; + + HybridSetBase::IteratorBase* iter = nullptr; + auto hybrid_set = expr->get_set_func(); + + if (hybrid_set != nullptr) { + *pdt = vectorized::VScanNode::PushDownType::UNACCEPTABLE; + return Status::OK(); + } else { + auto* pred = static_cast<vectorized::VInPredicate*>(expr); + + auto* state = reinterpret_cast<vectorized::InState*>( + expr_ctx->fn_context(pred->fn_context_index()) + ->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + if (!state->use_set) { + return Status::OK(); + } + + iter = state->hybrid_set->begin(); + + if (state->hybrid_set->contain_null()) { + *pdt = vectorized::VScanNode::PushDownType::UNACCEPTABLE; + return Status::OK(); + } + } + + while (iter->has_next()) { + if (nullptr == iter->get_value()) { + iter->next(); + continue; + } + auto* value = const_cast<void*>(iter->get_value()); + RETURN_IF_ERROR(_change_value_range<false>( + range, value, ColumnValueRange<T>::add_compound_value_range, fn_name, 0)); + iter->next(); + } + *pdt = vectorized::VScanNode::PushDownType::ACCEPTABLE; + } + return Status::OK(); +} + +template <typename Derived> +template <PrimitiveType T> +Status ScanLocalState<Derived>::_normalize_match_compound_predicate( vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, SlotDescriptor* slot, ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) { DCHECK(expr->children().size() == 2); diff --git a/be/src/pipeline/exec/scan_operator.h b/be/src/pipeline/exec/scan_operator.h index 35a5d0c722a..4cdebeedc83 100644 --- a/be/src/pipeline/exec/scan_operator.h +++ b/be/src/pipeline/exec/scan_operator.h @@ -283,16 +283,23 @@ protected: vectorized::VExprSPtr&)>& eq_predicate_checker); template <PrimitiveType T> - Status _normalize_binary_in_compound_predicate(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, ColumnValueRange<T>& range, - vectorized::VScanNode::PushDownType* pdt); + Status _normalize_binary_compound_predicate(vectorized::VExpr* expr, + vectorized::VExprContext* expr_ctx, + SlotDescriptor* slot, ColumnValueRange<T>& range, + vectorized::VScanNode::PushDownType* pdt); template <PrimitiveType T> - Status _normalize_match_in_compound_predicate(vectorized::VExpr* expr, - vectorized::VExprContext* expr_ctx, - SlotDescriptor* slot, ColumnValueRange<T>& range, - vectorized::VScanNode::PushDownType* pdt); + Status _normalize_in_and_not_in_compound_predicate(vectorized::VExpr* expr, + vectorized::VExprContext* expr_ctx, + SlotDescriptor* slot, + ColumnValueRange<T>& range, + vectorized::VScanNode::PushDownType* pdt); + + template <PrimitiveType T> + Status _normalize_match_compound_predicate(vectorized::VExpr* expr, + vectorized::VExprContext* expr_ctx, + SlotDescriptor* slot, ColumnValueRange<T>& range, + vectorized::VScanNode::PushDownType* pdt); template <PrimitiveType T> Status _normalize_is_null_predicate(vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index ed18150e66a..5a64287ce24 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -1138,8 +1138,7 @@ Status VScanNode::_normalize_in_and_not_in_compound_predicate(vectorized::VExpr* ColumnValueRange<T>& range, PushDownType* pdt) { if (TExprNodeType::IN_PRED == expr->node_type()) { - std::string fn_name = - expr->op() == TExprOpcode::type::FILTER_IN ? "in_list" : "not_in_list"; + std::string fn_name = expr->op() == TExprOpcode::type::FILTER_IN ? "in" : "not_in"; HybridSetBase::IteratorBase* iter = nullptr; auto hybrid_set = expr->get_set_func(); diff --git a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out new file mode 100644 index 00000000000..528b4008084 --- /dev/null +++ b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out @@ -0,0 +1,67 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +8 + +-- !sql -- +996 + +-- !sql -- +210 + +-- !sql -- +8 + +-- !sql -- +998 + +-- !sql -- +208 + +-- !sql -- +30 + +-- !sql -- +0 + +-- !sql -- +0 + +-- !sql -- +34 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +3 + +-- !sql -- +2 + +-- !sql -- +30 + +-- !sql -- +0 + +-- !sql -- +0 + +-- !sql -- +34 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +3 + +-- !sql -- +2 + diff --git a/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out new file mode 100644 index 00000000000..784795cbbc1 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out @@ -0,0 +1,18 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +\N +a +b +f +h +i +j +k +l +o +p +q +v +y +z + diff --git a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy new file mode 100644 index 00000000000..8d22c001ed0 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy @@ -0,0 +1,124 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_index_inlist_fault_injection", "nonConcurrent") { + // define a sql table + def indexTbName = "test_index_inlist_fault_injection" + + sql "DROP TABLE IF EXISTS ${indexTbName}" + sql """ + CREATE TABLE ${indexTbName} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', + INDEX status_idx (`status`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, + expected_succ_rows = -1, load_to_single_tablet = 'true' -> + + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'label', label + "_" + UUID.randomUUID().toString() + set 'read_json_by_line', read_flag + set 'format', format_flag + file file_name // import json file + time 10000 // limit inflight 10s + if (expected_succ_rows >= 0) { + set 'max_filter_ratio', '1' + } + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (ignore_failure && expected_succ_rows < 0) { return } + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + } + } + } + + try { + load_httplogs_data.call(indexTbName, 'test_index_inlist_fault_injection', 'true', 'json', 'documents-1000.json') + + sql "sync" + + try { + GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._rowid_result_for_index") + + sql """ set enable_common_expr_pushdown = true; """ + + qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0'); """ + qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where status in (1, 304, 200); """ + qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where (request match 'hm' or clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0')); """ + qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where (request match 'hm' and clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0')); """ + qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where (request match 'hm' or status in (1, 304, 200)); """ + qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where (request match 'hm' and status in (1, 304, 200)); """ + + } finally { + GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index") + } + + try { + sql """ set enable_common_expr_pushdown = true; """ + + qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip not in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + + qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' and clientip = NULL or clientip = '40.135.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """ + + sql """ set enable_common_expr_pushdown = false; """ + + qt_sql """ select count() from ${indexTbName} where (clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip not in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') or clientip match '120.1.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip match '2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0') and clientip match '120.1.0.0'); """ + + qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' and clientip = NULL or clientip = '40.135.0.0'); """ + qt_sql """ select count() from ${indexTbName} where (clientip = '2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """ + + sql """ set enable_common_expr_pushdown = true; """ + } finally { + } + } finally { + } +} \ No newline at end of file diff --git a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy index bddbfdc4ab0..faede0bc3c1 100644 --- a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy +++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy @@ -14,7 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -suite("test_index_rqg_bug2", "test_index_rqg_bug2"){ +suite("test_index_rqg_bug2", "test_index_rqg_bug"){ def table1 = "test_index_rqg_bug2" sql "drop table if exists ${table1}" diff --git a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy new file mode 100644 index 00000000000..9353616f95f --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("test_index_rqg_bug4", "test_index_rqg_bug"){ + def table1 = "test_index_rqg_bug4_table1" + def table2 = "test_index_rqg_bug4_table2" + + sql "drop table if exists ${table1}" + sql "drop table if exists ${table2}" + + sql """ + create table ${table1} ( + col_date_undef_signed_not_null_index_inverted date not null , + col_bigint_undef_signed_not_null_index_inverted bigint not null , + col_bigint_undef_signed_not_null bigint not null , + col_int_undef_signed int null , + col_int_undef_signed_index_inverted int null , + col_int_undef_signed_not_null int not null , + col_int_undef_signed_not_null_index_inverted int not null , + col_bigint_undef_signed bigint null , + col_bigint_undef_signed_index_inverted bigint null , + col_date_undef_signed date null , + col_date_undef_signed_index_inverted date null , + col_date_undef_signed_not_null date not null , + col_varchar_10__undef_signed varchar(10) null , + col_varchar_10__undef_signed_index_inverted varchar(10) null , + col_varchar_10__undef_signed_not_null varchar(10) not null , + col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not null , + col_varchar_1024__undef_signed varchar(1024) null , + col_varchar_1024__undef_signed_index_inverted varchar(1024) null , + col_varchar_1024__undef_signed_not_null varchar(1024) not null , + col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024) not null , + pk int, + INDEX col_int_undef_signed_index_inverted_idx (`col_int_undef_signed_index_inverted`) USING INVERTED, + INDEX col_int_undef_signed_not_null_index_inverted_idx (`col_int_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_index_inverted_idx (`col_bigint_undef_signed_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_not_null_index_inverted_idx (`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_index_inverted_idx (`col_date_undef_signed_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_not_null_index_inverted_idx (`col_date_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_index_inverted_idx (`col_varchar_10__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx (`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_index_inverted_idx (`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx (`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED + ) engine=olap + UNIQUE KEY(col_date_undef_signed_not_null_index_inverted, col_bigint_undef_signed_not_null_index_inverted, col_bigint_undef_signed_not_null) + PARTITION BY RANGE(col_date_undef_signed_not_null_index_inverted) ( + FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY, + FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY, + FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY, + FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY, + FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY + ) + distributed by hash(col_bigint_undef_signed_not_null_index_inverted) + properties("enable_unique_key_merge_on_write" = "true", "replication_num" = "1"); + """ + + sql """ + create table ${table2} ( + col_date_undef_signed_not_null date not null , + col_bigint_undef_signed_not_null_index_inverted bigint not null , + col_bigint_undef_signed_not_null bigint not null , + col_int_undef_signed int null , + col_int_undef_signed_index_inverted int null , + col_int_undef_signed_not_null int not null , + col_int_undef_signed_not_null_index_inverted int not null , + col_bigint_undef_signed bigint null , + col_bigint_undef_signed_index_inverted bigint null , + col_date_undef_signed date null , + col_date_undef_signed_index_inverted date null , + col_date_undef_signed_not_null_index_inverted date not null , + col_varchar_10__undef_signed varchar(10) null , + col_varchar_10__undef_signed_index_inverted varchar(10) null , + col_varchar_10__undef_signed_not_null varchar(10) not null , + col_varchar_10__undef_signed_not_null_index_inverted varchar(10) not null , + col_varchar_1024__undef_signed varchar(1024) null , + col_varchar_1024__undef_signed_index_inverted varchar(1024) null , + col_varchar_1024__undef_signed_not_null varchar(1024) not null , + col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024) not null , + pk int, + INDEX col_int_undef_signed_index_inverted_idx (`col_int_undef_signed_index_inverted`) USING INVERTED, + INDEX col_int_undef_signed_not_null_index_inverted_idx (`col_int_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_index_inverted_idx (`col_bigint_undef_signed_index_inverted`) USING INVERTED, + INDEX col_bigint_undef_signed_not_null_index_inverted_idx (`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_index_inverted_idx (`col_date_undef_signed_index_inverted`) USING INVERTED, + INDEX col_date_undef_signed_not_null_index_inverted_idx (`col_date_undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_index_inverted_idx (`col_varchar_10__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx (`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_index_inverted_idx (`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED, + INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx (`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED + ) engine=olap + UNIQUE KEY(col_date_undef_signed_not_null, col_bigint_undef_signed_not_null_index_inverted, col_bigint_undef_signed_not_null) + PARTITION BY RANGE(col_date_undef_signed_not_null) ( + PARTITION p0 VALUES LESS THAN ('2023-12-11'), + PARTITION p1 VALUES LESS THAN ('2023-12-15'), + PARTITION p2 VALUES LESS THAN ('2023-12-16'), + PARTITION p3 VALUES LESS THAN ('2023-12-25'), + PARTITION p4 VALUES LESS THAN ('2024-01-18'), + PARTITION p5 VALUES LESS THAN ('2026-02-18'), + PARTITION p6 VALUES LESS THAN ('5024-02-18'), + PARTITION p100 VALUES LESS THAN ('9999-12-31') + ) + distributed by hash(col_bigint_undef_signed_not_null_index_inverted) + properties("enable_unique_key_merge_on_write" = "true", "replication_num" = "1"); + """ + + sql """ + insert into ${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde [...] + """ + + sql """ + insert into ${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde [...] + """ + + try { + sql "sync" + + qt_sql """ + select + table1.col_varchar_1024__undef_signed_not_null as field1 + from + ${table1} as table1 + right join ${table2} as table2 on ( + table2.col_date_undef_signed_index_inverted = table1.col_date_undef_signed_index_inverted + ) + where + not ( + ( + table2.`col_date_undef_signed_not_null_index_inverted` in ( + '2027-01-16', + '2023-12-17', + '2024-02-18', + null, + '2000-10-18', + '2023-12-14', + '2023-12-18' + ) + ) + and table2.`col_date_undef_signed_not_null_index_inverted` < '2025-06-18' + ) + group by + field1 + order by + field1 + limit + 10000; + """ + } finally { + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org