This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 7697f02b890 [fix](ub) undefined behavior in FixedContainer (#39191)
7697f02b890 is described below

commit 7697f02b890976523d851cfb5dd3a93adb0d2826
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Sun Aug 11 06:32:23 2024 +0800

    [fix](ub) undefined behavior in FixedContainer (#39191)
    
    ## Proposed changes
    
    Undefined behavior occurs if there is a null value in the list.
    
    ```
    /root/doris/be/src/vec/common/string_ref.h:271:54: runtime error: null 
pointer passed as argument 2, which is declared to never be null
    /var/local/ldb-toolchain/bin/../usr/include/string.h:64:33: note: nonnull 
attribute specified here
    #0 0x5616d072245d in doris::StringRef::eq(doris::StringRef const&) const 
/root/doris/be/src/vec/common/string_ref.h:271:41
    #1 0x5616d072245d in doris::StringRef::operator==(doris::StringRef const&) 
const /root/doris/be/src/vec/common/string_ref.h:274:60
    #2 0x5616d072245d in doris::FixedContainer::find(doris::StringRef const&) 
const /root/doris/be/src/exprs/hybrid_set.h:76:36
    #3 0x5616d072245d in void 
doris::StringValueSet>::_find_batch(doris::vectorized::IColumn const&, unsigned 
long, doris::vectorized::PODArray, 16ul, 15ul> const*, 
doris::vectorized::PODArray, 16ul, 15ul>&) 
/root/doris/be/src/exprs/hybrid_set.h:688:63
    #4 0x5616d0747857 in 
doris::vectorized::FunctionIn::execute_impl(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) 
const /root/doris/be/src/vec/functions/in.h:170:21
    #5 0x5616c741fa3a in 
doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) 
const /root/doris/be/src/vec/functions/function.h:462:26
    #6 0x5616cbb5b650 in 
doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, 
bool) const /root/doris/be/src/vec/functions/function.cpp
    #7 0x5616cbb4e14e in 
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, 
bool) const /root/doris/be/src/vec/functions/function.cpp:244:12
    #8 0x5616cbb4e3c2 in 
doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, 
bool) const /root/doris/be/src/vec/functions/function.cpp:250:12
    #9 0x5616c741cd68 in 
doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, 
bool) const /root/doris/be/src/vec/functions/function.h:190:19
    #10 0x5616c74cf712 in 
doris::vectorized::VInPredicate::execute(doris::vectorized::VExprContext*, 
doris::vectorized::Block*, int*) 
/root/doris/be/src/vec/exprs/vin_predicate.cpp:130:5
    #11 0x5616c740d5c0 in 
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
 doris::vectorized::Block*, int*, std::vector>&) 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:183:9
    #12 0x5616c740ecf5 in 
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, 
doris::vectorized::Block*, int*) 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:215:12
    #13 0x5616c7462e24 in 
doris::vectorized::VCompoundPred::execute(doris::vectorized::VExprContext*, 
doris::vectorized::Block*, int*) 
/root/doris/be/src/vec/exprs/vcompound_pred.h:127:38
    #14 0x5616c74bccec in 
doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) 
/root/doris/be/src/vec/exprs/vexpr_context.cpp:54:5
    #15 0x5616c74c1dcc in 
doris::vectorized::VExprContext::execute_conjuncts(std::vector, 
std::allocator>> const&, std::vector, 16ul, 15ul>, std::allocator, 16ul, 
15ul>>> const*, bool, doris::vectorized::Block*, doris::vectorized::PODArray, 
16ul, 15ul>, bool) /root/doris/be/src/vec/exprs/vexpr_context.cpp:169:9
    #16 0x5616c74c5108 in 
doris::vectorized::VExprContext::execute_conjuncts_and_filter_block(std::vector,
 std::allocator>> const&, doris::vectorized::Block*, std::vector>&, int, 
doris::vectorized::PODArray, 16ul, 15ul>&) 
/root/doris/be/src/vec/exprs/vexpr_context.cpp:322:5
    #17 0x5616ad8a7f1a in 
doris::segment_v2::SegmentIterator::_execute_common_expr(unsigned short*, 
unsigned short&, doris::vectorized::Block*) 
/root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2680:5
    #18 0x5616ad89e86e in 
doris::segment_v2::SegmentIterator::_next_batch_internal(doris::vectorized::Block*)
 /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2582:25
    #19 0x5616ad892f5c in 
doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()()
 const /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2315:9
    #20 0x5616ad892f5c in 
doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*) 
/root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2314:19
    #21 0x5616ad6dd9cc in 
doris::segment_v2::LazyInitSegmentIterator::next_batch(doris::vectorized::Block*)
 /root/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:44:33
    #22 0x5616ad269d67 in 
doris::BetaRowsetReader::next_block(doris::vectorized::Block*) 
/root/doris/be/src/olap/rowset/beta_rowset_reader.cpp:380:29
    #23 0x5616de6de110 in 
doris::vectorized::VCollectIterator::Level0Iterator::_refresh() 
/root/doris/be/src/vec/olap/vcollect_iterator.h
    #24 0x5616de6c967f in 
doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row() 
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:514:24
    #25 0x5616de6ca8a6 in 
doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref() 
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:493:14
    #26 0x5616de6d7008 in 
doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref() 
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:692:27
    #27 0x5616de6bd200 in 
doris::vectorized::VCollectIterator::build_heap(std::vector, std::allocator>>&) 
/root/doris/be/src/vec/olap/vcollect_iterator.cpp:186:9
    #28 0x5616de651b6c in 
doris::vectorized::BlockReader::_init_collect_iter(doris::TabletReader::ReaderParams
 const&) /root/doris/be/src/vec/olap/block_reader.cpp:157:5
    #29 0x5616de65526f in 
doris::vectorized::BlockReader::init(doris::TabletReader::ReaderParams const&) 
/root/doris/be/src/vec/olap/block_reader.cpp:229:19
    #30 0x5616e175a0f9 in 
doris::vectorized::NewOlapScanner::open(doris::RuntimeState*) 
/root/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:237:32
    #31 0x5616c736ad34 in 
doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr, 
std::shared_ptr) /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:236:5
    #32 0x5616c736f05e in 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr, 
std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() 
const::'lambda'()::operator()() const 
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:176:21
    #33 0x5616c736f05e in 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr, 
std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const 
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:175:31
    #34 0x5616c736f05e in void std::_invoke_impl, 
std::shared_ptr)::$_1::operator()() const::'lambda'()&>(std::_invoke_other, 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr, 
std::shared_ptr)::$_1::operator()() const::'lambda'()&) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14
    #35 0x5616c736f05e in std::enable_if, std::shared_ptr)::$1::operator()() 
const::'lambda'()&>, void>::type std::_invoke_r, 
std::shared_ptr)::$_1::operator()() 
const::'lambda'()&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr,
 std::shared_ptr)::$_1::operator()() const::'lambda'()&) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2
    #36 0x5616c736f05e in std::_Function_handler, 
std::shared_ptr)::$_1::operator()() 
const::'lambda'()>::_M_invoke(std::_Any_data const&) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9
    #37 0x5616aeed6a3b in doris::ThreadPool::dispatch_thread() 
/root/doris/be/src/util/threadpool.cpp:543:24
    #38 0x5616aeeae4f7 in doris::Thread::supervise_thread(void*) 
/root/doris/be/src/util/thread.cpp:498:5
    #39 0x7f7e663e3ac2 in start_thread nptl/pthread_create.c:442:8
    #40 0x7f7e6647584f misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
    
    SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior 
/root/doris/be/src/vec/common/string_ref.h:271:54 in
    ```
---
 be/src/exprs/hybrid_set.h                          | 35 ++++++++++++++++++++++
 be/src/vec/functions/in.h                          |  2 +-
 .../data/nereids_syntax_p0/inpredicate.out         |  9 ++++++
 .../suites/nereids_syntax_p0/inpredicate.groovy    | 16 ++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h
index b75cc81ebf1..f0977a652b1 100644
--- a/be/src/exprs/hybrid_set.h
+++ b/be/src/exprs/hybrid_set.h
@@ -17,7 +17,13 @@
 
 #pragma once
 
+#include <glog/logging.h>
+
+#include <type_traits>
+
+#include "common/exception.h"
 #include "common/object_pool.h"
+#include "common/status.h"
 #include "exprs/runtime_filter.h"
 #include "runtime/decimalv2_value.h"
 #include "runtime/define_primitive_type.h"
@@ -60,8 +66,16 @@ public:
         }
     }
 
+    void check_size() {
+        if (N != _size) {
+            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
+                                   "invalid size of FixedContainer<{}>: {}", 
N, _size);
+        }
+    }
+
     // Use '|' instead of '||' has better performance by test.
     ALWAYS_INLINE bool find(const T& value) const {
+        DCHECK_EQ(N, _size);
         if constexpr (N == 0) {
             return false;
         }
@@ -144,6 +158,12 @@ private:
     size_t _size {};
 };
 
+template <typename T>
+struct IsFixedContainer : std::false_type {};
+
+template <typename T, size_t N>
+struct IsFixedContainer<FixedContainer<T, N>> : std::true_type {};
+
 /**
  * Dynamic Container uses phmap::flat_hash_set.
  * @tparam T Element Type
@@ -354,6 +374,11 @@ public:
         if constexpr (is_nullable) {
             null_map_data = null_map->data();
         }
+
+        if constexpr (IsFixedContainer<ContainerType>::value) {
+            _set.check_size();
+        }
+
         auto* __restrict result_data = results.data();
         for (size_t i = 0; i < rows; ++i) {
             if constexpr (!is_nullable && !is_negative) {
@@ -507,6 +532,11 @@ public:
         if constexpr (is_nullable) {
             null_map_data = null_map->data();
         }
+
+        if constexpr (IsFixedContainer<ContainerType>::value) {
+            _set.check_size();
+        }
+
         auto* __restrict result_data = results.data();
         for (size_t i = 0; i < rows; ++i) {
             const auto& string_data = col.get_data_at(i).to_string();
@@ -675,6 +705,11 @@ public:
         if constexpr (is_nullable) {
             null_map_data = null_map->data();
         }
+
+        if constexpr (IsFixedContainer<ContainerType>::value) {
+            _set.check_size();
+        }
+
         auto* __restrict result_data = results.data();
         for (size_t i = 0; i < rows; ++i) {
             uint32_t len = offset[i] - offset[i - 1];
diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h
index b25ad8eeb67..9b5c5bb023a 100644
--- a/be/src/vec/functions/in.h
+++ b/be/src/vec/functions/in.h
@@ -114,7 +114,7 @@ public:
                    context->get_arg_type(0)->type == 
PrimitiveType::TYPE_VARCHAR ||
                    context->get_arg_type(0)->type == 
PrimitiveType::TYPE_STRING) {
             // the StringValue's memory is held by FunctionContext, so we can 
use StringValueSet here directly
-            
state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args()
 - 1)));
+            
state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context)));
         } else {
             state->hybrid_set.reset(
                     create_set(context->get_arg_type(0)->type, 
get_size_with_out_null(context)));
diff --git a/regression-test/data/nereids_syntax_p0/inpredicate.out 
b/regression-test/data/nereids_syntax_p0/inpredicate.out
index cee03178b5c..ac6219c69ce 100644
--- a/regression-test/data/nereids_syntax_p0/inpredicate.out
+++ b/regression-test/data/nereids_syntax_p0/inpredicate.out
@@ -31,3 +31,12 @@
 29     Supplier#000000029      VVSymB3fbwaN    ARGENTINA4      ARGENTINA       
AMERICA 11-773-203-7342
 9      Supplier#000000009      ,gJ6K2MKveYxQT  IRAN     6      IRAN    MIDDLE 
EAST     20-338-906-3675
 
+-- !in_predicate_11 --
+15     Supplier#000000015      DF35PepL5saAK   INDIA    0      INDIA   ASIA    
18-687-542-7601
+
+-- !in_predicate_12 --
+
+-- !in_predicate_13 --
+
+-- !in_predicate_14 --
+
diff --git a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy 
b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
index 3cdf096519c..bf4ec9787f9 100644
--- a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
+++ b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy
@@ -61,5 +61,21 @@ suite("inpredicate") {
     order_qt_in_predicate_10 """
         SELECT * FROM supplier WHERE s_suppkey not in (15);
     """
+
+    order_qt_in_predicate_11 """
+        SELECT * FROM supplier WHERE s_suppkey in (15, null);
+    """
+
+    order_qt_in_predicate_12 """
+        SELECT * FROM supplier WHERE s_suppkey not in (15, null);
+    """
+
+    order_qt_in_predicate_13 """
+        SELECT * FROM supplier WHERE s_nation in ('PERU', 'ETHIOPIA', null);
+    """
+
+    order_qt_in_predicate_14 """
+        SELECT * FROM supplier WHERE s_nation not in ('PERU', 'ETHIOPIA', 
null);
+    """
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to