This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b765be18488c9907f62956a43ddcabd95e4d5cab
Author: hui lai <1353307...@qq.com>
AuthorDate: Thu Jun 13 22:01:09 2024 +0800

    [fix](multi-table-load) fix be core when multi table load pipe finish fail 
(#36269)
    
    ## Proposed changes
    
    ```
    *** Current BE git commitID: 5a8ea3079d ***
    *** SIGSEGV address not mapped to object (@0x18) received by PID 3726857 
(TID 3727585 OR 0x7f0129e83700) from PID 24; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/mnt/disk2/xujianxu/doris/be/src/common/signal_handler.h:421
     1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     2# JVM_handle_linux_signal in 
/usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so
     3# 0x00007F01D9E87090 in /lib/x86_64-linux-gnu/libc.so.6
     4# 
std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, 
std::__future_base::_Result_base::_Deleter> (), 
std::__future_base::_State_baseV2::_Setter<doris::Status, doris::Status const&> 
>::_M_invoke(std::_Any_data const&) at 
/mnt/disk2/xujianxu/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290
     5# 
std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base,
 std::__future_base::_Result_base::_Deleter> ()>*, bool*) at 
/mnt/disk2/xujianxu/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/future:593
     6# __pthread_once_slow at 
/build/glibc-SzIz7B/glibc-2.31/nptl/pthread_once.c:118
     7# 
std::__future_base::_State_baseV2::_M_set_result(std::function<std::unique_ptr<std::__future_base::_Result_base,
 std::__future_base::_Result_base::_Deleter> ()>, bool) at 
/mnt/disk2/xujianxu/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/future:428
     8# doris::io::MultiTablePipe::_handle_consumer_finished() at 
/mnt/disk2/xujianxu/doris/be/src/io/fs/multi_table_pipe.cpp:334
     9# 
doris::io::MultiTablePipe::exec_plans<doris::TPipelineFragmentParams>(doris::ExecEnv*,
 std::vector<doris::TPipelineFragmentParams, 
std::allocator<doris::TPipelineFragmentParams> 
>)::{lambda(doris::RuntimeState*, 
doris::Status*)#1}::operator()(doris::RuntimeState*, doris::Status*) const at 
/mnt/disk2/xujianxu/doris/be/src/io/fs/multi_table_pipe.cpp:253
    10# doris::pipeline::PipelineFragmentContext::~PipelineFragmentContext() at 
/mnt/disk2/xujianxu/doris/be/src/pipeline/pipeline_fragment_context.cpp:131
    11# 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release_last_use_cold() 
at 
/mnt/disk2/xujianxu/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/shared_ptr_base.h:199
    12# doris::pipeline::_close_task(doris::pipeline::PipelineTask*, 
doris::Status) at 
/mnt/disk2/xujianxu/doris/be/src/pipeline/task_scheduler.cpp:95
    13# doris::pipeline::TaskScheduler::_do_work(unsigned long) at 
/mnt/disk2/xujianxu/doris/be/src/pipeline/task_scheduler.cpp:168
    14# doris::ThreadPool::dispatch_thread() in 
/mnt/hdd01/STRESS_ENV/be/lib/doris_be
    15# doris::Thread::supervise_thread(void*) at 
/mnt/disk2/xujianxu/doris/be/src/util/thread.cpp:499
    16# start_thread at /build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:478
    17# __clone at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97
    ```
    
    BE will core when multi table load pipe finish fail. For exec_task will
    return if finish fail, causing ctx was deconstructed.
    Wait all table finish to solve this problem.
---
 be/src/runtime/routine_load/routine_load_task_executor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/be/src/runtime/routine_load/routine_load_task_executor.cpp 
b/be/src/runtime/routine_load/routine_load_task_executor.cpp
index 292f45ec030..958ad37f8d2 100644
--- a/be/src/runtime/routine_load/routine_load_task_executor.cpp
+++ b/be/src/runtime/routine_load/routine_load_task_executor.cpp
@@ -408,7 +408,7 @@ void 
RoutineLoadTaskExecutor::exec_task(std::shared_ptr<StreamLoadContext> ctx,
         }
         // need memory order
         multi_table_pipe->handle_consume_finished();
-        HANDLE_ERROR(kafka_pipe->finish(), "finish multi table task failed");
+        HANDLE_MULTI_TABLE_ERROR(kafka_pipe->finish(), "finish multi table 
task failed");
     } else {
         // start to consume, this may block a while
         HANDLE_ERROR(consumer_grp->start_all(ctx, kafka_pipe), "consuming 
failed");


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to