This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new e2bb86e7f80 [fix](inverted index) fixed in_list condition not indexed 
on pipelinex (#38178)
e2bb86e7f80 is described below

commit e2bb86e7f806f6e14463357644b749f06d3c1e59
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Thu Jul 25 14:42:34 2024 +0800

    [fix](inverted index) fixed in_list condition not indexed on pipelinex 
(#38178)
    
    ## Proposed changes
    
    https://github.com/apache/doris/pull/36565
    https://github.com/apache/doris/pull/37842
    https://github.com/apache/doris/pull/37921
    https://github.com/apache/doris/pull/37386
    
    <!--Describe your changes.-->
---
 be/src/exec/olap_utils.h                           |   4 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  14 ++
 be/src/pipeline/exec/scan_operator.cpp             |  92 ++++++++----
 be/src/pipeline/exec/scan_operator.h               |  23 ++-
 be/src/vec/exec/scan/vscan_node.cpp                |   3 +-
 .../test_index_inlist_fault_injection.out          |  67 +++++++++
 .../data/inverted_index_p0/test_index_rqg_bug4.out |  18 +++
 .../test_index_inlist_fault_injection.groovy       | 124 ++++++++++++++++
 .../inverted_index_p0/test_index_rqg_bug2.groovy   |   2 +-
 .../inverted_index_p0/test_index_rqg_bug4.groovy   | 162 +++++++++++++++++++++
 10 files changed, 468 insertions(+), 41 deletions(-)

diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h
index d1a1be81f5d..ddf8562fea1 100644
--- a/be/src/exec/olap_utils.h
+++ b/be/src/exec/olap_utils.h
@@ -117,9 +117,9 @@ inline SQLFilterOp to_olap_filter_type(const std::string& 
function_name, bool op
         return opposite ? FILTER_NOT_IN : FILTER_IN;
     } else if (function_name == "ne") {
         return opposite ? FILTER_IN : FILTER_NOT_IN;
-    } else if (function_name == "in_list") {
+    } else if (function_name == "in") {
         return opposite ? FILTER_NOT_IN : FILTER_IN;
-    } else if (function_name == "not_in_list") {
+    } else if (function_name == "not_in") {
         return opposite ? FILTER_IN : FILTER_NOT_IN;
     } else {
         DCHECK(false) << "Function Name: " << function_name;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index b53949b746c..49616c63e5c 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -2342,6 +2342,15 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
         return Status::EndOfFile("no more data in segment");
     }
 
+    DBUG_EXECUTE_IF("segment_iterator._rowid_result_for_index", {
+        for (auto& iter : _rowid_result_for_index) {
+            if (iter.second.first) {
+                return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                        "_rowid_result_for_index exists true");
+            }
+        }
+    })
+
     if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
         if (_non_predicate_columns.empty()) {
             return Status::InternalError("_non_predicate_columns is empty");
@@ -2731,6 +2740,11 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     } else if (_is_literal_node(node_type)) {
         auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
         _column_predicate_info->query_values.insert(v_literal_expr->value());
+    } else if (node_type == TExprNodeType::NULL_LITERAL) {
+        if (!_column_predicate_info->column_name.empty()) {
+            auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
+            
_column_predicate_info->query_values.insert(v_literal_expr->value());
+        }
     } else {
         if (node_type == TExprNodeType::MATCH_PRED) {
             _column_predicate_info->query_op = "match";
diff --git a/be/src/pipeline/exec/scan_operator.cpp 
b/be/src/pipeline/exec/scan_operator.cpp
index 385517fbeaf..7322b085564 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -1034,8 +1034,10 @@ void 
ScanLocalState<Derived>::_normalize_compound_predicate(
         auto compound_fn_name = expr->fn().name.function_name;
         auto children_num = expr->children().size();
         for (auto i = 0; i < children_num; ++i) {
-            auto child_expr = expr->children()[i].get();
-            if (TExprNodeType::BINARY_PRED == child_expr->node_type()) {
+            auto* child_expr = expr->children()[i].get();
+            if (TExprNodeType::BINARY_PRED == child_expr->node_type() ||
+                TExprNodeType::IN_PRED == child_expr->node_type() ||
+                TExprNodeType::MATCH_PRED == child_expr->node_type()) {
                 SlotDescriptor* slot = nullptr;
                 ColumnValueRangeType* range_on_slot = nullptr;
                 if (_is_predicate_acting_on_slot(child_expr, 
in_predicate_checker, &slot,
@@ -1050,30 +1052,16 @@ void 
ScanLocalState<Derived>::_normalize_compound_predicate(
                                     value_range.mark_runtime_filter_predicate(
                                             _is_runtime_filter_predicate);
                                 }};
-                                
static_cast<void>(_normalize_binary_in_compound_predicate(
-                                        child_expr, expr_ctx, slot, 
value_range, pdt));
-                            },
-                            active_range);
-
-                    _compound_value_ranges.emplace_back(active_range);
-                }
-            } else if (TExprNodeType::MATCH_PRED == child_expr->node_type()) {
-                SlotDescriptor* slot = nullptr;
-                ColumnValueRangeType* range_on_slot = nullptr;
-                if (_is_predicate_acting_on_slot(child_expr, 
in_predicate_checker, &slot,
-                                                 &range_on_slot) ||
-                    _is_predicate_acting_on_slot(child_expr, 
eq_predicate_checker, &slot,
-                                                 &range_on_slot)) {
-                    ColumnValueRangeType active_range =
-                            *range_on_slot; // copy, in order not to affect 
the range in the _colname_to_value_range
-                    std::visit(
-                            [&](auto& value_range) {
-                                Defer mark_runtime_filter_flag {[&]() {
-                                    value_range.mark_runtime_filter_predicate(
-                                            _is_runtime_filter_predicate);
-                                }};
-                                
static_cast<void>(_normalize_match_in_compound_predicate(
-                                        child_expr, expr_ctx, slot, 
value_range, pdt));
+                                if (TExprNodeType::BINARY_PRED == 
child_expr->node_type()) {
+                                    
static_cast<void>(_normalize_binary_compound_predicate(
+                                            child_expr, expr_ctx, slot, 
value_range, pdt));
+                                } else if (TExprNodeType::IN_PRED == 
child_expr->node_type()) {
+                                    
static_cast<void>(_normalize_in_and_not_in_compound_predicate(
+                                            child_expr, expr_ctx, slot, 
value_range, pdt));
+                                } else {
+                                    
static_cast<void>(_normalize_match_compound_predicate(
+                                            child_expr, expr_ctx, slot, 
value_range, pdt));
+                                }
                             },
                             active_range);
 
@@ -1090,7 +1078,7 @@ void 
ScanLocalState<Derived>::_normalize_compound_predicate(
 
 template <typename Derived>
 template <PrimitiveType T>
-Status ScanLocalState<Derived>::_normalize_binary_in_compound_predicate(
+Status ScanLocalState<Derived>::_normalize_binary_compound_predicate(
         vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, 
SlotDescriptor* slot,
         ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
     DCHECK(expr->children().size() == 2);
@@ -1149,7 +1137,55 @@ Status 
ScanLocalState<Derived>::_normalize_binary_in_compound_predicate(
 
 template <typename Derived>
 template <PrimitiveType T>
-Status ScanLocalState<Derived>::_normalize_match_in_compound_predicate(
+Status ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate(
+        vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, 
SlotDescriptor* slot,
+        ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
+    if (TExprNodeType::IN_PRED == expr->node_type()) {
+        std::string fn_name = expr->op() == TExprOpcode::type::FILTER_IN ? 
"in" : "not_in";
+
+        HybridSetBase::IteratorBase* iter = nullptr;
+        auto hybrid_set = expr->get_set_func();
+
+        if (hybrid_set != nullptr) {
+            *pdt = vectorized::VScanNode::PushDownType::UNACCEPTABLE;
+            return Status::OK();
+        } else {
+            auto* pred = static_cast<vectorized::VInPredicate*>(expr);
+
+            auto* state = reinterpret_cast<vectorized::InState*>(
+                    expr_ctx->fn_context(pred->fn_context_index())
+                            
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+
+            if (!state->use_set) {
+                return Status::OK();
+            }
+
+            iter = state->hybrid_set->begin();
+
+            if (state->hybrid_set->contain_null()) {
+                *pdt = vectorized::VScanNode::PushDownType::UNACCEPTABLE;
+                return Status::OK();
+            }
+        }
+
+        while (iter->has_next()) {
+            if (nullptr == iter->get_value()) {
+                iter->next();
+                continue;
+            }
+            auto* value = const_cast<void*>(iter->get_value());
+            RETURN_IF_ERROR(_change_value_range<false>(
+                    range, value, 
ColumnValueRange<T>::add_compound_value_range, fn_name, 0));
+            iter->next();
+        }
+        *pdt = vectorized::VScanNode::PushDownType::ACCEPTABLE;
+    }
+    return Status::OK();
+}
+
+template <typename Derived>
+template <PrimitiveType T>
+Status ScanLocalState<Derived>::_normalize_match_compound_predicate(
         vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, 
SlotDescriptor* slot,
         ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
     DCHECK(expr->children().size() == 2);
diff --git a/be/src/pipeline/exec/scan_operator.h 
b/be/src/pipeline/exec/scan_operator.h
index 35a5d0c722a..4cdebeedc83 100644
--- a/be/src/pipeline/exec/scan_operator.h
+++ b/be/src/pipeline/exec/scan_operator.h
@@ -283,16 +283,23 @@ protected:
                                      vectorized::VExprSPtr&)>& 
eq_predicate_checker);
 
     template <PrimitiveType T>
-    Status _normalize_binary_in_compound_predicate(vectorized::VExpr* expr,
-                                                   vectorized::VExprContext* 
expr_ctx,
-                                                   SlotDescriptor* slot, 
ColumnValueRange<T>& range,
-                                                   
vectorized::VScanNode::PushDownType* pdt);
+    Status _normalize_binary_compound_predicate(vectorized::VExpr* expr,
+                                                vectorized::VExprContext* 
expr_ctx,
+                                                SlotDescriptor* slot, 
ColumnValueRange<T>& range,
+                                                
vectorized::VScanNode::PushDownType* pdt);
 
     template <PrimitiveType T>
-    Status _normalize_match_in_compound_predicate(vectorized::VExpr* expr,
-                                                  vectorized::VExprContext* 
expr_ctx,
-                                                  SlotDescriptor* slot, 
ColumnValueRange<T>& range,
-                                                  
vectorized::VScanNode::PushDownType* pdt);
+    Status _normalize_in_and_not_in_compound_predicate(vectorized::VExpr* expr,
+                                                       
vectorized::VExprContext* expr_ctx,
+                                                       SlotDescriptor* slot,
+                                                       ColumnValueRange<T>& 
range,
+                                                       
vectorized::VScanNode::PushDownType* pdt);
+
+    template <PrimitiveType T>
+    Status _normalize_match_compound_predicate(vectorized::VExpr* expr,
+                                               vectorized::VExprContext* 
expr_ctx,
+                                               SlotDescriptor* slot, 
ColumnValueRange<T>& range,
+                                               
vectorized::VScanNode::PushDownType* pdt);
 
     template <PrimitiveType T>
     Status _normalize_is_null_predicate(vectorized::VExpr* expr, 
vectorized::VExprContext* expr_ctx,
diff --git a/be/src/vec/exec/scan/vscan_node.cpp 
b/be/src/vec/exec/scan/vscan_node.cpp
index ed18150e66a..5a64287ce24 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -1138,8 +1138,7 @@ Status 
VScanNode::_normalize_in_and_not_in_compound_predicate(vectorized::VExpr*
                                                               
ColumnValueRange<T>& range,
                                                               PushDownType* 
pdt) {
     if (TExprNodeType::IN_PRED == expr->node_type()) {
-        std::string fn_name =
-                expr->op() == TExprOpcode::type::FILTER_IN ? "in_list" : 
"not_in_list";
+        std::string fn_name = expr->op() == TExprOpcode::type::FILTER_IN ? 
"in" : "not_in";
 
         HybridSetBase::IteratorBase* iter = nullptr;
         auto hybrid_set = expr->get_set_func();
diff --git 
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out 
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
new file mode 100644
index 00000000000..528b4008084
--- /dev/null
+++ 
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
@@ -0,0 +1,67 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+8
+
+-- !sql --
+996
+
+-- !sql --
+210
+
+-- !sql --
+8
+
+-- !sql --
+998
+
+-- !sql --
+208
+
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !sql --
+3
+
+-- !sql --
+2
+
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !sql --
+3
+
+-- !sql --
+2
+
diff --git a/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out 
b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
new file mode 100644
index 00000000000..784795cbbc1
--- /dev/null
+++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
@@ -0,0 +1,18 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+\N
+a
+b
+f
+h
+i
+j
+k
+l
+o
+p
+q
+v
+y
+z
+
diff --git 
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
new file mode 100644
index 00000000000..8d22c001ed0
--- /dev/null
+++ 
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_index_inlist_fault_injection", "nonConcurrent") {
+    // define a sql table
+    def indexTbName = "test_index_inlist_fault_injection"
+
+    sql "DROP TABLE IF EXISTS ${indexTbName}"
+    sql """
+      CREATE TABLE ${indexTbName} (
+        `@timestamp` int(11) NULL COMMENT "",
+        `clientip` varchar(20) NULL COMMENT "",
+        `request` text NULL COMMENT "",
+        `status` int(11) NULL COMMENT "",
+        `size` int(11) NULL COMMENT "",
+        INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '',
+        INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"english", "support_phrase" = "true") COMMENT '',
+        INDEX status_idx (`status`) USING INVERTED COMMENT ''
+      ) ENGINE=OLAP
+      DUPLICATE KEY(`@timestamp`)
+      COMMENT "OLAP"
+      DISTRIBUTED BY RANDOM BUCKETS 1
+      PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "disable_auto_compaction" = "true"
+      );
+    """
+
+    def load_httplogs_data = {table_name, label, read_flag, format_flag, 
file_name, ignore_failure=false,
+                        expected_succ_rows = -1, load_to_single_tablet = 
'true' ->
+
+        // load the json data
+        streamLoad {
+            table "${table_name}"
+
+            // set http request header params
+            set 'label', label + "_" + UUID.randomUUID().toString()
+            set 'read_json_by_line', read_flag
+            set 'format', format_flag
+            file file_name // import json file
+            time 10000 // limit inflight 10s
+            if (expected_succ_rows >= 0) {
+                set 'max_filter_ratio', '1'
+            }
+
+            // if declared a check callback, the default check condition will 
ignore.
+            // So you must check all condition
+            check { result, exception, startTime, endTime ->
+                       if (ignore_failure && expected_succ_rows < 0) { return }
+                    if (exception != null) {
+                        throw exception
+                    }
+                    log.info("Stream load result: ${result}".toString())
+                    def json = parseJson(result)
+            }
+        }
+    }
+
+    try {
+      load_httplogs_data.call(indexTbName, 
'test_index_inlist_fault_injection', 'true', 'json', 'documents-1000.json')
+
+      sql "sync"
+
+      try {
+        
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
+
+        sql """ set enable_common_expr_pushdown = true; """
+
+        qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ 
count() from ${indexTbName} where clientip in ('40.135.0.0', '232.0.0.0', 
'26.1.0.0'); """
+        qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ 
count() from ${indexTbName} where status in (1, 304, 200); """
+        qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ 
count() from ${indexTbName} where (request match 'hm' or clientip in 
('40.135.0.0', '232.0.0.0', '26.1.0.0')); """
+        qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ 
count() from ${indexTbName} where (request match 'hm' and clientip in 
('40.135.0.0', '232.0.0.0', '26.1.0.0')); """
+        qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ 
count() from ${indexTbName} where (request match 'hm' or status in (1, 304, 
200)); """
+        qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ 
count() from ${indexTbName} where (request match 'hm' and status in (1, 304, 
200)); """
+
+      } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
+      }
+
+      try {
+        sql """ set enable_common_expr_pushdown = true; """
+
+        qt_sql """ select count() from ${indexTbName} where (clientip in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip not in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+        qt_sql """ select count() from ${indexTbName} where (clientip = 
'2.1.0.0' and clientip = NULL or clientip = '40.135.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip = 
'2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
+
+        sql """ set enable_common_expr_pushdown = false; """
+
+        qt_sql """ select count() from ${indexTbName} where (clientip in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip not in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+        qt_sql """ select count() from ${indexTbName} where (clientip = 
'2.1.0.0' and clientip = NULL or clientip = '40.135.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip = 
'2.1.0.0' or clientip = NULL and clientip = '40.135.0.0'); """
+
+        sql """ set enable_common_expr_pushdown = true; """
+      } finally {
+      }
+    } finally {
+    }
+}
\ No newline at end of file
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy 
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
index bddbfdc4ab0..faede0bc3c1 100644
--- a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-suite("test_index_rqg_bug2", "test_index_rqg_bug2"){
+suite("test_index_rqg_bug2", "test_index_rqg_bug"){
     def table1 = "test_index_rqg_bug2"
 
     sql "drop table if exists ${table1}"
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy 
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
new file mode 100644
index 00000000000..9353616f95f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_rqg_bug4", "test_index_rqg_bug"){
+    def table1 = "test_index_rqg_bug4_table1"
+    def table2 = "test_index_rqg_bug4_table2"
+
+    sql "drop table if exists ${table1}"
+    sql "drop table if exists ${table2}"
+
+    sql """
+      create table ${table1} (
+      col_date_undef_signed_not_null_index_inverted date  not null  ,
+      col_bigint_undef_signed_not_null_index_inverted bigint  not null  ,
+      col_bigint_undef_signed_not_null bigint  not null  ,
+      col_int_undef_signed int  null  ,
+      col_int_undef_signed_index_inverted int  null  ,
+      col_int_undef_signed_not_null int  not null  ,
+      col_int_undef_signed_not_null_index_inverted int  not null  ,
+      col_bigint_undef_signed bigint  null  ,
+      col_bigint_undef_signed_index_inverted bigint  null  ,
+      col_date_undef_signed date  null  ,
+      col_date_undef_signed_index_inverted date  null  ,
+      col_date_undef_signed_not_null date  not null  ,
+      col_varchar_10__undef_signed varchar(10)  null  ,
+      col_varchar_10__undef_signed_index_inverted varchar(10)  null  ,
+      col_varchar_10__undef_signed_not_null varchar(10)  not null  ,
+      col_varchar_10__undef_signed_not_null_index_inverted varchar(10)  not 
null  ,
+      col_varchar_1024__undef_signed varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_index_inverted varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_not_null varchar(1024)  not null  ,
+      col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)  
not null  ,
+      pk int,
+      INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_index_inverted_idx 
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx 
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+      ) engine=olap
+      UNIQUE KEY(col_date_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null)
+      PARTITION BY             
RANGE(col_date_undef_signed_not_null_index_inverted) (
+                      FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY,
+                      FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY,
+                      FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY,
+                      FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY,
+                      FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY
+                  )
+      distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+      properties("enable_unique_key_merge_on_write" = "true", 
"replication_num" = "1");
+    """
+
+    sql """
+      create table ${table2} (
+      col_date_undef_signed_not_null date  not null  ,
+      col_bigint_undef_signed_not_null_index_inverted bigint  not null  ,
+      col_bigint_undef_signed_not_null bigint  not null  ,
+      col_int_undef_signed int  null  ,
+      col_int_undef_signed_index_inverted int  null  ,
+      col_int_undef_signed_not_null int  not null  ,
+      col_int_undef_signed_not_null_index_inverted int  not null  ,
+      col_bigint_undef_signed bigint  null  ,
+      col_bigint_undef_signed_index_inverted bigint  null  ,
+      col_date_undef_signed date  null  ,
+      col_date_undef_signed_index_inverted date  null  ,
+      col_date_undef_signed_not_null_index_inverted date  not null  ,
+      col_varchar_10__undef_signed varchar(10)  null  ,
+      col_varchar_10__undef_signed_index_inverted varchar(10)  null  ,
+      col_varchar_10__undef_signed_not_null varchar(10)  not null  ,
+      col_varchar_10__undef_signed_not_null_index_inverted varchar(10)  not 
null  ,
+      col_varchar_1024__undef_signed varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_index_inverted varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_not_null varchar(1024)  not null  ,
+      col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)  
not null  ,
+      pk int,
+      INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_index_inverted_idx 
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx 
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+      ) engine=olap
+      UNIQUE KEY(col_date_undef_signed_not_null, 
col_bigint_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null)
+      PARTITION BY             RANGE(col_date_undef_signed_not_null) (
+                      PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+                      PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+                      PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+                      PARTITION p3 VALUES LESS THAN ('2023-12-25'),
+                      PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+                      PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+                      PARTITION p6 VALUES LESS THAN ('5024-02-18'),
+                      PARTITION p100 VALUES LESS THAN ('9999-12-31')
+                  )
+      distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+      properties("enable_unique_key_merge_on_write" = "true", 
"replication_num" = "1");
+    """
+
+    sql """
+      insert into 
${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
 [...]
+    """
+
+    sql """
+      insert into 
${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
 [...]
+    """
+
+    try {
+        sql "sync"
+
+        qt_sql """
+          select
+            table1.col_varchar_1024__undef_signed_not_null as field1
+          from
+            ${table1} as table1
+            right join ${table2} as table2 on (
+              table2.col_date_undef_signed_index_inverted = 
table1.col_date_undef_signed_index_inverted
+            )
+          where
+            not (
+              (
+                table2.`col_date_undef_signed_not_null_index_inverted` in (
+                  '2027-01-16',
+                  '2023-12-17',
+                  '2024-02-18',
+                  null,
+                  '2000-10-18',
+                  '2023-12-14',
+                  '2023-12-18'
+                )
+              )
+              and table2.`col_date_undef_signed_not_null_index_inverted` < 
'2025-06-18'
+            )
+          group by
+            field1
+          order by
+            field1
+          limit
+            10000;
+        """
+    } finally {
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to