This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 134e86e2ecf [fix](function) Undefined behavior in parse_url (#49149)
134e86e2ecf is described below

commit 134e86e2ecfb705c3e7b92d613b3dafa92d3a40b
Author: Jerry Hu <hushengg...@selectdb.com>
AuthorDate: Tue Mar 18 15:25:24 2025 +0800

    [fix](function) Undefined behavior in parse_url (#49149)
    
    ### What problem does this PR solve?
    
    ```
    /root/doris/be/src/vec/common/pod_array.h:510:29: runtime error: null 
pointer passed as argument 2, which is declared to never be null
    /root/ldb_toolchain/bin/../usr/include/string.h:43:28: note: nonnull 
attribute specified here
        #0 0x55cb2c2cea1e in void doris::vectorized::PODArray<unsigned char, 
4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 
15ul>::insert_assume_reserved<char const*, char const*>(char const*, char 
const*) /root/doris/be/src/vec/common/pod_array.h:510:9
        #1 0x55cb2c2ce8a7 in void doris::vectorized::PODArray<unsigned char, 
4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 
15ul>::insert<char const*, char const*>(char const*, char const*) 
/root/doris/be/src/vec/common/pod_array.h:472:9
        #2 0x55cb5a0b0d50 in 
doris::vectorized::StringOP::push_value_string(std::basic_string_view<char, 
std::char_traits<char>> const&, unsigned long, 
doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, 
false, DefaultMemoryAllocator>, 16ul, 15ul>&, 
doris::vectorized::PODArray<unsigned int, 4096ul, Allocator<false, false, 
false, DefaultMemoryAllocator>, 16ul, 15ul>&) 
/root/doris/be/src/vec/functions/function_string.h:128:15
        #3 0x55cb5d6843a2 in doris::Status 
doris::vectorized::FunctionStringParseUrl::vector_parse<false, 
true>(doris::vectorized::ColumnStr<unsigned int> const*, 
std::vector<doris::UrlParser::UrlPart, 
std::allocator<doris::UrlParser::UrlPart>>&, int, 
doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, 
false, DefaultMemoryAllocator>, 16ul, 15ul>&, 
doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, 
false, DefaultMemoryAllocator>, 16ul, 15ul>&, [...]
        #4 0x55cb5d683e9d in auto 
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, 
auto)::operator()<std::integral_constant<bool, false>, 
std::integral_constant<bool, true>>(auto, auto) const 
/root/doris/be/src/vec/functions/function_string.h:2783:13
        #5 0x55cb5d683c39 in auto std::__invoke_impl<doris::Status, 
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto), 
std::integral_constant<bool, false>, std::integral_constant<bool, 
true>>(std::__invoke_other, auto&&, std::integral_constant<bool, false>&&, 
std::integral_constant<bool, true>&&) /root/ldb_toolch [...]
        #6 0x55cb5d683af5 in std::__invoke_result<auto, 
std::integral_constant<bool, false>, std::integral_constant<bool, true>>::type 
std::__invoke<doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto), 
std::integral_constant<bool, false>, std::integral_constant<bool, 
true>>(auto&&, std::integral_constant<bool, fals [...]
        #7 0x55cb5d682dd3 in 
std::__detail::__variant::__gen_vtable_impl<std::__detail::__variant::_Multi_array<std::__detail::__variant::__deduce_visit_result<doris::Status>
 
(*)(doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto)&&, 
std::variant<std::integral_constant<bool, false>, std::integral_constant<bool, 
tru [...]
        #8 0x55cb5d682ad5 in decltype(auto) 
std::__do_visit<std::__detail::__variant::__deduce_visit_result<doris::Status>, 
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto), 
std::variant<std::integral_constant<bool, false>, std::integral_constant<bool, 
true>>, std::variant<std::integral_constant<bool, false>, st [...]
        #9 0x55cb5d67766b in std::invoke_result<auto, 
std::__conditional<is_lvalue_reference_v<std::variant<std::integral_constant<bool,
 false>, std::integral_constant<bool, 
true>>>>::type<std::variant_alternative<0ul, 
std::remove_reference<decltype(__variant::__as(std::declval<std::variant<std::integral_constant<bool,
 false>, std::integral_constant<bool, true>>>()))>::type>::type&, 
std::variant_alternative<0ul, 
std::remove_reference<decltype(__variant::__as(std::declval<std::variant<std: 
[...]
        #10 0x55cb5d676604 in 
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) const 
/root/doris/be/src/vec/functions/function_string.h:2783:13
        #11 0x55cb51a43fd4 in 
doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) const 
/root/doris/be/src/vec/functions/function.h:434:26
        #12 0x55cb583f7dff in 
doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long, bool) const 
/root/doris/be/src/vec/functions/function.cpp:119:16
        #13 0x55cb583e6de9 in 
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long, bool) const 
/root/doris/be/src/vec/functions/function.cpp:244:12
        #14 0x55cb583e5f53 in 
doris::vectorized::PreparedFunctionImpl::default_implementation_for_nulls(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long, bool, bool*) const 
/root/doris/be/src/vec/functions/function.cpp:216:9
        #15 0x55cb583f7939 in 
doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long, bool) const 
/root/doris/be/src/vec/functions/function.cpp:110:5
        #16 0x55cb583e6de9 in 
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long, bool) const 
/root/doris/be/src/vec/functions/function.cpp:244:12
        #17 0x55cb583e7069 in 
doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long, bool) const 
/root/doris/be/src/vec/functions/function.cpp:250:12
        #18 0x55cb51a3fd95 in 
doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long, bool) const 
/root/doris/be/src/vec/functions/function.h:193:19
        #19 0x55cb51a2797f in 
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
 doris::vectorized::Block*, int*, std::vector<unsigned int, 
std::allocator<unsigned int>>&) 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:187:5
        #20 0x55cb51a28a77 in 
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, 
doris::vectorized::Block*, int*) 
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:220:12
        #21 0x55cb51b5f3b4 in 
doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) 
/root/doris/be/src/vec/exprs/vexpr_context.cpp:61:5
        #22 0x55cb519a6732 in 
doris::vectorized::Scanner::_do_projections(doris::vectorized::Block*, 
doris::vectorized::Block*) /root/doris/be/src/vec/exec/scan/scanner.cpp:200:9
        #23 0x55cb519a1b13 in 
doris::vectorized::Scanner::get_block_after_projects(doris::RuntimeState*, 
doris::vectorized::Block*, bool*) 
/root/doris/be/src/vec/exec/scan/scanner.cpp:82:16
        #24 0x55cb5192a59d in 
doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>) 
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:241:5
        #25 0x55cb51931c38 in 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() 
const::'lambda'()::operator()() const::'lambda'()::operator()() const 
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:148:21
        #26 0x55cb519314ef in 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() 
const::'lambda'()::operator()() const 
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:147:31
        #27 0x55cb519312fe in void std::__invoke_impl<void, 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() 
const::'lambda'()&>(std::__invoke_other, 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() 
const::'lambda'()&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gn [...]
        #28 0x55cb5193123e in std::enable_if<is_invocable_r_v<void, 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() 
const::'lambda'()&>, void>::type std::__invoke_r<void, 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() 
const::'lambda'()&>(doris::vectorized::ScannerSc [...]
        #29 0x55cb51930e45 in std::_Function_handler<void (), 
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
 std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() 
const::'lambda'()>::_M_invoke(std::_Any_data const&) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
        #30 0x55cb216f8e3f in std::function<void ()>::operator()() const 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
        #31 0x55cb51940ec6 in 
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()::operator()()
 const /root/doris/be/src/vec/exec/scan/scanner_scheduler.h:149:65
        #32 0x55cb51940e7e in void std::__invoke_impl<void, 
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>(std::__invoke_other,
 
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&)
 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
        #33 0x55cb51940dbe in std::enable_if<is_invocable_r_v<void, 
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>,
 void>::type std::__invoke_r<void, 
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>(doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&)
 /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux- [...]
        #34 0x55cb51940745 in std::_Function_handler<void (), 
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()>::_M_invoke(std::_Any_data
 const&) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
        #35 0x55cb216f8e3f in std::function<void ()>::operator()() const 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
        #36 0x55cb28bd9844 in doris::FunctionRunnable::run() 
/root/doris/be/src/util/threadpool.cpp:64:27
        #37 0x55cb28bb84b9 in doris::ThreadPool::dispatch_thread() 
/root/doris/be/src/util/threadpool.cpp:616:24
        #38 0x55cb28bfd263 in void std::__invoke_impl<void, void 
(doris::ThreadPool::*&)(), doris::ThreadPool*&>(std::__invoke_memfun_deref, 
void (doris::ThreadPool::*&)(), doris::ThreadPool*&) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:74:14
        #39 0x55cb28bfd068 in std::__invoke_result<void 
(doris::ThreadPool::*&)(), doris::ThreadPool*&>::type std::__invoke<void 
(doris::ThreadPool::*&)(), doris::ThreadPool*&>(void (doris::ThreadPool::*&)(), 
doris::ThreadPool*&) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14
        #40 0x55cb28bfcfa0 in void std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>::__call<void, 0ul>(std::tuple<>&&, 
std::_Index_tuple<0ul>) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:506:11
        #41 0x55cb28bfcd95 in void std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>::operator()<void>() 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:591:17
        #42 0x55cb28bfcc8e in void std::__invoke_impl<void, std::_Bind<void 
(doris::ThreadPool::* (doris::ThreadPool*))()>&>(std::__invoke_other, 
std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
        #43 0x55cb28bfcbce in std::enable_if<is_invocable_r_v<void, 
std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&>, void>::type 
std::__invoke_r<void, std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>&>(std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>&) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2
        #44 0x55cb28bfc665 in std::_Function_handler<void (), std::_Bind<void 
(doris::ThreadPool::* (doris::ThreadPool*))()>>::_M_invoke(std::_Any_data 
const&) 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
        #45 0x55cb216f8e3f in std::function<void ()>::operator()() const 
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
        #46 0x55cb28b74241 in doris::Thread::supervise_thread(void*) 
/root/doris/be/src/util/thread.cpp:498:5
        #47 0x55cb2142be0a in asan_thread_start(void*) crtstuff.c
        #48 0x7f17840221c9 in start_thread (/lib64/libpthread.so.0+0x81c9) 
(BuildId: 7c4add5c7a885e6ff4ce17867d6a2286e4420eec)
        #49 0x7f1784a118d2 in clone (/lib64/libc.so.6+0x398d2) (BuildId: 
4ee3325955e3b55b6805f33959b7cb77745ad625)
---
 be/src/vec/functions/function_string.h             |   6 +++++-
 be/test/vec/function/function_string_test.cpp      |   3 ++-
 .../data/function_p0/test_function_string.out      | Bin 121 -> 188 bytes
 .../suites/function_p0/test_function_string.groovy |  24 +++++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index 3b909f4a8d5..5b37cc44c5d 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -122,6 +122,7 @@ struct StringOP {
 
     static void push_value_string(const std::string_view& string_value, size_t 
index,
                                   ColumnString::Chars& chars, 
ColumnString::Offsets& offsets) {
+        DCHECK(string_value.data() != nullptr);
         ColumnString::check_chars_length(chars.size() + string_value.size(), 
offsets.size());
 
         chars.insert(string_value.data(), string_value.data() + 
string_value.size());
@@ -2802,11 +2803,14 @@ public:
             StringRef url_val = 
url_col->get_data_at(index_check_const<url_const>(i));
             StringRef parse_res;
             if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
+                if (parse_res.empty()) [[unlikely]] {
+                    StringOP::push_empty_string(i, res_chars, res_offsets);
+                    continue;
+                }
                 StringOP::push_value_string(std::string_view(parse_res.data, 
parse_res.size), i,
                                             res_chars, res_offsets);
             } else {
                 StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
-                continue;
             }
         }
         return Status::OK();
diff --git a/be/test/vec/function/function_string_test.cpp 
b/be/test/vec/function/function_string_test.cpp
index 6dc0e4ba42f..2a0326361fd 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -2284,7 +2284,8 @@ TEST(function_string_test, function_parse_url_test) {
                 {{std::string(
                           
"https://www.facebook.com/aa/bb?returnpage=https://www.facebook.com/";),
                   std::string("HosT")},
-                 std::string("www.facebook.com")}};
+                 std::string("www.facebook.com")},
+                {{std::string("http://www.baidu.com";), std::string("FILE")}, 
{std::string("")}}};
 
         check_function_all_arg_comb<DataTypeString, true>(func_name, 
input_types, data_set);
     }
diff --git a/regression-test/data/function_p0/test_function_string.out 
b/regression-test/data/function_p0/test_function_string.out
index 226d3e675f3..6524bb82fc0 100644
Binary files a/regression-test/data/function_p0/test_function_string.out and 
b/regression-test/data/function_p0/test_function_string.out differ
diff --git a/regression-test/suites/function_p0/test_function_string.groovy 
b/regression-test/suites/function_p0/test_function_string.groovy
index 5aa46fb6c52..28e4d832336 100644
--- a/regression-test/suites/function_p0/test_function_string.groovy
+++ b/regression-test/suites/function_p0/test_function_string.groovy
@@ -47,4 +47,28 @@ suite("test_function_string") {
         drop table if exists test_tb_function_space;
     """
 
+
+    sql """
+        drop table if exists test_parse_url;
+    """
+
+    sql """
+     CREATE TABLE `test_parse_url` (
+        `id` int NULL,
+        `url` text NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY RANDOM BUCKETS AUTO
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """
+        insert into test_parse_url values (1, 'http://www.facebook.com'), (2, 
"http://www.google.com/test?name=abc&age=20";);
+    """
+
+    qt_sql """
+        select parse_url(url, 'HOST') as host, parse_url(url, 'FILE') as file 
from test_parse_url order by id;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to