This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 21e5374f5a2 [fix](inverted index) Error parsing NULL_LITERAL (#37842)
21e5374f5a2 is described below

commit 21e5374f5a277c5ec9d05cb3727e95a612661e16
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Tue Jul 16 11:19:22 2024 +0800

    [fix](inverted index) Error parsing NULL_LITERAL (#37842)
    
    ## Proposed changes
    
    1. NULL_LITERAL and other LITERAL will enter the else process
    incorrectly
    2. SELECT * FROM table_name WHERE column_name NOT IN (1, 2, NULL); The
    NOT IN operator returns an empty result set when the collection contains
    NULL.
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |   6 +-
 be/src/pipeline/exec/scan_operator.cpp             |  19 ++-
 .../test_index_inlist_fault_injection.out          |  36 +++++
 .../test_index_rqg_bug4.out}                       |  31 ++--
 .../test_index_inlist_fault_injection.groovy       |  23 +++
 .../inverted_index_p0/test_index_rqg_bug2.groovy   |   2 +-
 .../inverted_index_p0/test_index_rqg_bug4.groovy   | 162 +++++++++++++++++++++
 7 files changed, 250 insertions(+), 29 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 6a66ae6fc14..a0b893bb24b 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -753,6 +753,7 @@ bool SegmentIterator::_is_literal_node(const 
TExprNodeType::type& node_type) {
     case TExprNodeType::DECIMAL_LITERAL:
     case TExprNodeType::STRING_LITERAL:
     case TExprNodeType::DATE_LITERAL:
+    case TExprNodeType::NULL_LITERAL:
         return true;
     default:
         return false;
@@ -2811,7 +2812,8 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
     } else if (_is_literal_node(node_type)) {
         auto v_literal_expr = static_cast<const 
doris::vectorized::VLiteral*>(expr.get());
         _column_predicate_info->query_values.insert(v_literal_expr->value());
-    } else {
+    } else if (node_type == TExprNodeType::BINARY_PRED || node_type == 
TExprNodeType::MATCH_PRED ||
+               node_type == TExprNodeType::IN_PRED) {
         if (node_type == TExprNodeType::MATCH_PRED) {
             _column_predicate_info->query_op = "match";
         } else if (node_type == TExprNodeType::IN_PRED) {
@@ -2820,7 +2822,7 @@ void 
SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
             } else {
                 _column_predicate_info->query_op = "not_in";
             }
-        } else if (node_type != TExprNodeType::COMPOUND_PRED) {
+        } else {
             _column_predicate_info->query_op = expr->fn().name.function_name;
         }
 
diff --git a/be/src/pipeline/exec/scan_operator.cpp 
b/be/src/pipeline/exec/scan_operator.cpp
index a72514fd0b1..c3fd0457d5e 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -1100,18 +1100,12 @@ Status 
ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate(
         auto hybrid_set = expr->get_set_func();
 
         if (hybrid_set != nullptr) {
-            if (hybrid_set->size() <=
-                _parent->cast<typename 
Derived::Parent>()._max_pushdown_conditions_per_column) {
-                iter = hybrid_set->begin();
-            } else {
-                _filter_predicates.in_filters.emplace_back(slot->col_name(), 
expr->get_set_func());
-                *pdt = PushDownType::ACCEPTABLE;
-                return Status::OK();
-            }
+            *pdt = PushDownType::UNACCEPTABLE;
+            return Status::OK();
         } else {
-            vectorized::VInPredicate* pred = 
static_cast<vectorized::VInPredicate*>(expr);
+            auto* pred = static_cast<vectorized::VInPredicate*>(expr);
 
-            vectorized::InState* state = 
reinterpret_cast<vectorized::InState*>(
+            auto* state = reinterpret_cast<vectorized::InState*>(
                     expr_ctx->fn_context(pred->fn_context_index())
                             
->get_function_state(FunctionContext::FRAGMENT_LOCAL));
 
@@ -1120,6 +1114,11 @@ Status 
ScanLocalState<Derived>::_normalize_in_and_not_in_compound_predicate(
             }
 
             iter = state->hybrid_set->begin();
+
+            if (state->hybrid_set->contain_null()) {
+                *pdt = PushDownType::UNACCEPTABLE;
+                return Status::OK();
+            }
         }
 
         while (iter->has_next()) {
diff --git 
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out 
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
index 9fbd1c8e252..7e9336a12c9 100644
--- 
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++ 
b/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
@@ -17,3 +17,39 @@
 -- !sql --
 208
 
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
+-- !sql --
+30
+
+-- !sql --
+0
+
+-- !sql --
+0
+
+-- !sql --
+34
+
+-- !sql --
+2
+
+-- !sql --
+2
+
diff --git 
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out 
b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
similarity index 57%
copy from 
regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
copy to regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
index 9fbd1c8e252..784795cbbc1 100644
--- 
a/regression-test/data/fault_injection_p0/test_index_inlist_fault_injection.out
+++ b/regression-test/data/inverted_index_p0/test_index_rqg_bug4.out
@@ -1,19 +1,18 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql --
-8
-
--- !sql --
-996
-
--- !sql --
-210
-
--- !sql --
-8
-
--- !sql --
-998
-
--- !sql --
-208
+\N
+a
+b
+f
+h
+i
+j
+k
+l
+o
+p
+q
+v
+y
+z
 
diff --git 
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
index 52f9ed5c656..57ec94d9f99 100644
--- 
a/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
+++ 
b/regression-test/suites/fault_injection_p0/test_index_inlist_fault_injection.groovy
@@ -90,6 +90,29 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
       } finally {
         
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
       }
+
+      try {
+        sql """ set enable_common_expr_pushdown = true; """
+
+        qt_sql """ select count() from ${indexTbName} where (clientip in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip not in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+        sql """ set enable_common_expr_pushdown = false; """
+
+        qt_sql """ select count() from ${indexTbName} where (clientip in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip not in 
('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', '247.37.0.0')); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' and clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') or clientip match '120.1.0.0'); """
+        qt_sql """ select count() from ${indexTbName} where (clientip match 
'2.1.0.0' or clientip in ('40.135.0.0', '232.0.0.0', NULL, '26.1.0.0', 
'247.37.0.0') and clientip match '120.1.0.0'); """
+
+        sql """ set enable_common_expr_pushdown = true; """
+      } finally {
+      }
     } finally {
     }
 }
\ No newline at end of file
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy 
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
index bddbfdc4ab0..faede0bc3c1 100644
--- a/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug2.groovy
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-suite("test_index_rqg_bug2", "test_index_rqg_bug2"){
+suite("test_index_rqg_bug2", "test_index_rqg_bug"){
     def table1 = "test_index_rqg_bug2"
 
     sql "drop table if exists ${table1}"
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy 
b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
new file mode 100644
index 00000000000..9353616f95f
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_index_rqg_bug4.groovy
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+suite("test_index_rqg_bug4", "test_index_rqg_bug"){
+    def table1 = "test_index_rqg_bug4_table1"
+    def table2 = "test_index_rqg_bug4_table2"
+
+    sql "drop table if exists ${table1}"
+    sql "drop table if exists ${table2}"
+
+    sql """
+      create table ${table1} (
+      col_date_undef_signed_not_null_index_inverted date  not null  ,
+      col_bigint_undef_signed_not_null_index_inverted bigint  not null  ,
+      col_bigint_undef_signed_not_null bigint  not null  ,
+      col_int_undef_signed int  null  ,
+      col_int_undef_signed_index_inverted int  null  ,
+      col_int_undef_signed_not_null int  not null  ,
+      col_int_undef_signed_not_null_index_inverted int  not null  ,
+      col_bigint_undef_signed bigint  null  ,
+      col_bigint_undef_signed_index_inverted bigint  null  ,
+      col_date_undef_signed date  null  ,
+      col_date_undef_signed_index_inverted date  null  ,
+      col_date_undef_signed_not_null date  not null  ,
+      col_varchar_10__undef_signed varchar(10)  null  ,
+      col_varchar_10__undef_signed_index_inverted varchar(10)  null  ,
+      col_varchar_10__undef_signed_not_null varchar(10)  not null  ,
+      col_varchar_10__undef_signed_not_null_index_inverted varchar(10)  not 
null  ,
+      col_varchar_1024__undef_signed varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_index_inverted varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_not_null varchar(1024)  not null  ,
+      col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)  
not null  ,
+      pk int,
+      INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_index_inverted_idx 
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx 
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+      ) engine=olap
+      UNIQUE KEY(col_date_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null)
+      PARTITION BY             
RANGE(col_date_undef_signed_not_null_index_inverted) (
+                      FROM ('2023-12-09') TO ('2024-03-09') INTERVAL 1 DAY,
+                      FROM ('2025-02-16') TO ('2025-03-09') INTERVAL 1 DAY,
+                      FROM ('2025-06-18') TO ('2025-06-20') INTERVAL 1 DAY,
+                      FROM ('2026-01-01') TO ('2026-03-09') INTERVAL 1 DAY,
+                      FROM ('2027-01-01') TO ('2027-02-09') INTERVAL 1 DAY
+                  )
+      distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+      properties("enable_unique_key_merge_on_write" = "true", 
"replication_num" = "1");
+    """
+
+    sql """
+      create table ${table2} (
+      col_date_undef_signed_not_null date  not null  ,
+      col_bigint_undef_signed_not_null_index_inverted bigint  not null  ,
+      col_bigint_undef_signed_not_null bigint  not null  ,
+      col_int_undef_signed int  null  ,
+      col_int_undef_signed_index_inverted int  null  ,
+      col_int_undef_signed_not_null int  not null  ,
+      col_int_undef_signed_not_null_index_inverted int  not null  ,
+      col_bigint_undef_signed bigint  null  ,
+      col_bigint_undef_signed_index_inverted bigint  null  ,
+      col_date_undef_signed date  null  ,
+      col_date_undef_signed_index_inverted date  null  ,
+      col_date_undef_signed_not_null_index_inverted date  not null  ,
+      col_varchar_10__undef_signed varchar(10)  null  ,
+      col_varchar_10__undef_signed_index_inverted varchar(10)  null  ,
+      col_varchar_10__undef_signed_not_null varchar(10)  not null  ,
+      col_varchar_10__undef_signed_not_null_index_inverted varchar(10)  not 
null  ,
+      col_varchar_1024__undef_signed varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_index_inverted varchar(1024)  null  ,
+      col_varchar_1024__undef_signed_not_null varchar(1024)  not null  ,
+      col_varchar_1024__undef_signed_not_null_index_inverted varchar(1024)  
not null  ,
+      pk int,
+      INDEX col_int_undef_signed_index_inverted_idx 
(`col_int_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_int_undef_signed_not_null_index_inverted_idx 
(`col_int_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_index_inverted_idx 
(`col_bigint_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_bigint_undef_signed_not_null_index_inverted_idx 
(`col_bigint_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_index_inverted_idx 
(`col_date_undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_date_undef_signed_not_null_index_inverted_idx 
(`col_date_undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_index_inverted_idx 
(`col_varchar_10__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_10__undef_signed_not_null_index_inverted_idx 
(`col_varchar_10__undef_signed_not_null_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_index_inverted_idx 
(`col_varchar_1024__undef_signed_index_inverted`) USING INVERTED,
+      INDEX col_varchar_1024__undef_signed_not_null_index_inverted_idx 
(`col_varchar_1024__undef_signed_not_null_index_inverted`) USING INVERTED
+      ) engine=olap
+      UNIQUE KEY(col_date_undef_signed_not_null, 
col_bigint_undef_signed_not_null_index_inverted, 
col_bigint_undef_signed_not_null)
+      PARTITION BY             RANGE(col_date_undef_signed_not_null) (
+                      PARTITION p0 VALUES LESS THAN ('2023-12-11'),
+                      PARTITION p1 VALUES LESS THAN ('2023-12-15'),
+                      PARTITION p2 VALUES LESS THAN ('2023-12-16'),
+                      PARTITION p3 VALUES LESS THAN ('2023-12-25'),
+                      PARTITION p4 VALUES LESS THAN ('2024-01-18'),
+                      PARTITION p5 VALUES LESS THAN ('2026-02-18'),
+                      PARTITION p6 VALUES LESS THAN ('5024-02-18'),
+                      PARTITION p100 VALUES LESS THAN ('9999-12-31')
+                  )
+      distributed by hash(col_bigint_undef_signed_not_null_index_inverted)
+      properties("enable_unique_key_merge_on_write" = "true", 
"replication_num" = "1");
+    """
+
+    sql """
+      insert into 
${table1}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
 [...]
+    """
+
+    sql """
+      insert into 
${table2}(pk,col_int_undef_signed,col_int_undef_signed_index_inverted,col_int_undef_signed_not_null,col_int_undef_signed_not_null_index_inverted,col_bigint_undef_signed,col_bigint_undef_signed_index_inverted,col_bigint_undef_signed_not_null,col_bigint_undef_signed_not_null_index_inverted,col_date_undef_signed,col_date_undef_signed_index_inverted,col_date_undef_signed_not_null,col_date_undef_signed_not_null_index_inverted,col_varchar_10__undef_signed,col_varchar_10__unde
 [...]
+    """
+
+    try {
+        sql "sync"
+
+        qt_sql """
+          select
+            table1.col_varchar_1024__undef_signed_not_null as field1
+          from
+            ${table1} as table1
+            right join ${table2} as table2 on (
+              table2.col_date_undef_signed_index_inverted = 
table1.col_date_undef_signed_index_inverted
+            )
+          where
+            not (
+              (
+                table2.`col_date_undef_signed_not_null_index_inverted` in (
+                  '2027-01-16',
+                  '2023-12-17',
+                  '2024-02-18',
+                  null,
+                  '2000-10-18',
+                  '2023-12-14',
+                  '2023-12-18'
+                )
+              )
+              and table2.`col_date_undef_signed_not_null_index_inverted` < 
'2025-06-18'
+            )
+          group by
+            field1
+          order by
+            field1
+          limit
+            10000;
+        """
+    } finally {
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to