This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 134e86e2ecf [fix](function) Undefined behavior in parse_url (#49149) 134e86e2ecf is described below commit 134e86e2ecfb705c3e7b92d613b3dafa92d3a40b Author: Jerry Hu <hushengg...@selectdb.com> AuthorDate: Tue Mar 18 15:25:24 2025 +0800 [fix](function) Undefined behavior in parse_url (#49149) ### What problem does this PR solve? ``` /root/doris/be/src/vec/common/pod_array.h:510:29: runtime error: null pointer passed as argument 2, which is declared to never be null /root/ldb_toolchain/bin/../usr/include/string.h:43:28: note: nonnull attribute specified here #0 0x55cb2c2cea1e in void doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 15ul>::insert_assume_reserved<char const*, char const*>(char const*, char const*) /root/doris/be/src/vec/common/pod_array.h:510:9 #1 0x55cb2c2ce8a7 in void doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 15ul>::insert<char const*, char const*>(char const*, char const*) /root/doris/be/src/vec/common/pod_array.h:472:9 #2 0x55cb5a0b0d50 in doris::vectorized::StringOP::push_value_string(std::basic_string_view<char, std::char_traits<char>> const&, unsigned long, doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 15ul>&, doris::vectorized::PODArray<unsigned int, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 15ul>&) /root/doris/be/src/vec/functions/function_string.h:128:15 #3 0x55cb5d6843a2 in doris::Status doris::vectorized::FunctionStringParseUrl::vector_parse<false, true>(doris::vectorized::ColumnStr<unsigned int> const*, std::vector<doris::UrlParser::UrlPart, std::allocator<doris::UrlParser::UrlPart>>&, int, doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 15ul>&, doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul, 15ul>&, [...] #4 0x55cb5d683e9d in auto doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto)::operator()<std::integral_constant<bool, false>, std::integral_constant<bool, true>>(auto, auto) const /root/doris/be/src/vec/functions/function_string.h:2783:13 #5 0x55cb5d683c39 in auto std::__invoke_impl<doris::Status, doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto), std::integral_constant<bool, false>, std::integral_constant<bool, true>>(std::__invoke_other, auto&&, std::integral_constant<bool, false>&&, std::integral_constant<bool, true>&&) /root/ldb_toolch [...] #6 0x55cb5d683af5 in std::__invoke_result<auto, std::integral_constant<bool, false>, std::integral_constant<bool, true>>::type std::__invoke<doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto), std::integral_constant<bool, false>, std::integral_constant<bool, true>>(auto&&, std::integral_constant<bool, fals [...] #7 0x55cb5d682dd3 in std::__detail::__variant::__gen_vtable_impl<std::__detail::__variant::_Multi_array<std::__detail::__variant::__deduce_visit_result<doris::Status> (*)(doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto)&&, std::variant<std::integral_constant<bool, false>, std::integral_constant<bool, tru [...] #8 0x55cb5d682ad5 in decltype(auto) std::__do_visit<std::__detail::__variant::__deduce_visit_result<doris::Status>, doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto), std::variant<std::integral_constant<bool, false>, std::integral_constant<bool, true>>, std::variant<std::integral_constant<bool, false>, st [...] #9 0x55cb5d67766b in std::invoke_result<auto, std::__conditional<is_lvalue_reference_v<std::variant<std::integral_constant<bool, false>, std::integral_constant<bool, true>>>>::type<std::variant_alternative<0ul, std::remove_reference<decltype(__variant::__as(std::declval<std::variant<std::integral_constant<bool, false>, std::integral_constant<bool, true>>>()))>::type>::type&, std::variant_alternative<0ul, std::remove_reference<decltype(__variant::__as(std::declval<std::variant<std: [...] #10 0x55cb5d676604 in doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) const /root/doris/be/src/vec/functions/function_string.h:2783:13 #11 0x55cb51a43fd4 in doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) const /root/doris/be/src/vec/functions/function.h:434:26 #12 0x55cb583f7dff in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:119:16 #13 0x55cb583e6de9 in doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:244:12 #14 0x55cb583e5f53 in doris::vectorized::PreparedFunctionImpl::default_implementation_for_nulls(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long, bool, bool*) const /root/doris/be/src/vec/functions/function.cpp:216:9 #15 0x55cb583f7939 in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:110:5 #16 0x55cb583e6de9 in doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:244:12 #17 0x55cb583e7069 in doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:250:12 #18 0x55cb51a3fd95 in doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long, bool) const /root/doris/be/src/vec/functions/function.h:193:19 #19 0x55cb51a2797f in doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*, std::vector<unsigned int, std::allocator<unsigned int>>&) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:187:5 #20 0x55cb51a28a77 in doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:220:12 #21 0x55cb51b5f3b4 in doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vexpr_context.cpp:61:5 #22 0x55cb519a6732 in doris::vectorized::Scanner::_do_projections(doris::vectorized::Block*, doris::vectorized::Block*) /root/doris/be/src/vec/exec/scan/scanner.cpp:200:9 #23 0x55cb519a1b13 in doris::vectorized::Scanner::get_block_after_projects(doris::RuntimeState*, doris::vectorized::Block*, bool*) /root/doris/be/src/vec/exec/scan/scanner.cpp:82:16 #24 0x55cb5192a59d in doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>) /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:241:5 #25 0x55cb51931c38 in doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:148:21 #26 0x55cb519314ef in doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:147:31 #27 0x55cb519312fe in void std::__invoke_impl<void, doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::'lambda'()&>(std::__invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::'lambda'()&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gn [...] #28 0x55cb5193123e in std::enable_if<is_invocable_r_v<void, doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::'lambda'()&>, void>::type std::__invoke_r<void, doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::'lambda'()&>(doris::vectorized::ScannerSc [...] #29 0x55cb51930e45 in std::_Function_handler<void (), doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::'lambda'()>::_M_invoke(std::_Any_data const&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9 #30 0x55cb216f8e3f in std::function<void ()>::operator()() const /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9 #31 0x55cb51940ec6 in doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.h:149:65 #32 0x55cb51940e7e in void std::__invoke_impl<void, doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>(std::__invoke_other, doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14 #33 0x55cb51940dbe in std::enable_if<is_invocable_r_v<void, doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>, void>::type std::__invoke_r<void, doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>(doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux- [...] #34 0x55cb51940745 in std::_Function_handler<void (), doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()>::_M_invoke(std::_Any_data const&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9 #35 0x55cb216f8e3f in std::function<void ()>::operator()() const /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9 #36 0x55cb28bd9844 in doris::FunctionRunnable::run() /root/doris/be/src/util/threadpool.cpp:64:27 #37 0x55cb28bb84b9 in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:616:24 #38 0x55cb28bfd263 in void std::__invoke_impl<void, void (doris::ThreadPool::*&)(), doris::ThreadPool*&>(std::__invoke_memfun_deref, void (doris::ThreadPool::*&)(), doris::ThreadPool*&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:74:14 #39 0x55cb28bfd068 in std::__invoke_result<void (doris::ThreadPool::*&)(), doris::ThreadPool*&>::type std::__invoke<void (doris::ThreadPool::*&)(), doris::ThreadPool*&>(void (doris::ThreadPool::*&)(), doris::ThreadPool*&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14 #40 0x55cb28bfcfa0 in void std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>::__call<void, 0ul>(std::tuple<>&&, std::_Index_tuple<0ul>) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:506:11 #41 0x55cb28bfcd95 in void std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>::operator()<void>() /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:591:17 #42 0x55cb28bfcc8e in void std::__invoke_impl<void, std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&>(std::__invoke_other, std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14 #43 0x55cb28bfcbce in std::enable_if<is_invocable_r_v<void, std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&>, void>::type std::__invoke_r<void, std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&>(std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2 #44 0x55cb28bfc665 in std::_Function_handler<void (), std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>>::_M_invoke(std::_Any_data const&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9 #45 0x55cb216f8e3f in std::function<void ()>::operator()() const /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9 #46 0x55cb28b74241 in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 #47 0x55cb2142be0a in asan_thread_start(void*) crtstuff.c #48 0x7f17840221c9 in start_thread (/lib64/libpthread.so.0+0x81c9) (BuildId: 7c4add5c7a885e6ff4ce17867d6a2286e4420eec) #49 0x7f1784a118d2 in clone (/lib64/libc.so.6+0x398d2) (BuildId: 4ee3325955e3b55b6805f33959b7cb77745ad625) --- be/src/vec/functions/function_string.h | 6 +++++- be/test/vec/function/function_string_test.cpp | 3 ++- .../data/function_p0/test_function_string.out | Bin 121 -> 188 bytes .../suites/function_p0/test_function_string.groovy | 24 +++++++++++++++++++++ 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 3b909f4a8d5..5b37cc44c5d 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -122,6 +122,7 @@ struct StringOP { static void push_value_string(const std::string_view& string_value, size_t index, ColumnString::Chars& chars, ColumnString::Offsets& offsets) { + DCHECK(string_value.data() != nullptr); ColumnString::check_chars_length(chars.size() + string_value.size(), offsets.size()); chars.insert(string_value.data(), string_value.data() + string_value.size()); @@ -2802,11 +2803,14 @@ public: StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); StringRef parse_res; if (UrlParser::parse_url(url_val, url_part, &parse_res)) { + if (parse_res.empty()) [[unlikely]] { + StringOP::push_empty_string(i, res_chars, res_offsets); + continue; + } StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, res_chars, res_offsets); } else { StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); - continue; } } return Status::OK(); diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp index 6dc0e4ba42f..2a0326361fd 100644 --- a/be/test/vec/function/function_string_test.cpp +++ b/be/test/vec/function/function_string_test.cpp @@ -2284,7 +2284,8 @@ TEST(function_string_test, function_parse_url_test) { {{std::string( "https://www.facebook.com/aa/bb?returnpage=https://www.facebook.com/"), std::string("HosT")}, - std::string("www.facebook.com")}}; + std::string("www.facebook.com")}, + {{std::string("http://www.baidu.com"), std::string("FILE")}, {std::string("")}}}; check_function_all_arg_comb<DataTypeString, true>(func_name, input_types, data_set); } diff --git a/regression-test/data/function_p0/test_function_string.out b/regression-test/data/function_p0/test_function_string.out index 226d3e675f3..6524bb82fc0 100644 Binary files a/regression-test/data/function_p0/test_function_string.out and b/regression-test/data/function_p0/test_function_string.out differ diff --git a/regression-test/suites/function_p0/test_function_string.groovy b/regression-test/suites/function_p0/test_function_string.groovy index 5aa46fb6c52..28e4d832336 100644 --- a/regression-test/suites/function_p0/test_function_string.groovy +++ b/regression-test/suites/function_p0/test_function_string.groovy @@ -47,4 +47,28 @@ suite("test_function_string") { drop table if exists test_tb_function_space; """ + + sql """ + drop table if exists test_parse_url; + """ + + sql """ + CREATE TABLE `test_parse_url` ( + `id` int NULL, + `url` text NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY RANDOM BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + insert into test_parse_url values (1, 'http://www.facebook.com'), (2, "http://www.google.com/test?name=abc&age=20"); + """ + + qt_sql """ + select parse_url(url, 'HOST') as host, parse_url(url, 'FILE') as file from test_parse_url order by id; + """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org