This is an automated email from the ASF dual-hosted git repository.

zouxinyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d9d45198ea4 [fix](memory) Fix `SwitchResourceContext` check attach 
task (#48429)
d9d45198ea4 is described below

commit d9d45198ea462682c68eab864d79094f1177ab3a
Author: Xinyi Zou <zouxi...@selectdb.com>
AuthorDate: Mon Mar 3 18:26:29 2025 +0800

    [fix](memory) Fix `SwitchResourceContext` check attach task (#48429)
    
    ### What problem does this PR solve?
    
    Fix: #47462
    
    `SwitchResourceContext` no longer checks whether `SCOPED_ATTACH_TASK`
    has been called before, which makes `SwitchResourceContext` more
    flexible.
    For example, when calling the `MemTableWriter::flush_async` function, it
    is correct whether the upper code attaches the task or not.
    
    Fix:
    ```
    F 2025-02-27 02:20:55,511 11069 thread_context.h:201] Check failed: 
is_attach_task()
    *** Check failure stack trace: ***
        @     0x558647d61466  google::LogMessage::SendToLog()
        @     0x558647d5deb0  google::LogMessage::Flush()
        @     0x558647d61ca9  google::LogMessageFatal::~LogMessageFatal()
        @     0x55863da71801  
doris::SwitchResourceContext::SwitchResourceContext()
        @     0x55863d3b90f9  doris::MemTableWriter::flush_async()
        @     0x55863d3b0d60  
doris::MemTableMemoryLimiter::_flush_active_memtables()
        @     0x55863d3b1217  
doris::MemTableMemoryLimiter::flush_workload_group_memtables()
        @     0x55863da900dc  
doris::WorkloadGroupMgr::flush_memtable_from_current_group_()
        @     0x55863da8d1be  doris::WorkloadGroupMgr::handle_paused_queries()
        @     0x55863cb74f65  doris::Daemon::memory_maintenance_thread()
        @     0x55863dc1c631  doris::Thread::supervise_thread()
        @     0x7f60f7117ac3  (unknown)
        @     0x7f60f71a9850  (unknown)
        @              (nil)  (unknown)
    *** Query id: 0-0 ***
    *** is nereids: 0 ***
    *** tablet id: 0 ***
    *** Aborted at 1740594055 (unix time) try "date -d @1740594055" if you are 
using GNU date ***
    *** Current BE git commitID: e0d85882bd ***
    *** SIGABRT unknown detail explain (@0x22fc) received by PID 8956 (TID 
11069 OR 0x7f5d09c04640) from PID 8956; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421
     1# 0x00007F60F70C5520 in /lib/x86_64-linux-gnu/libc.so.6
     2# pthread_kill at ./nptl/pthread_kill.c:89
     3# raise at ../sysdeps/posix/raise.c:27
     4# abort at ./stdlib/abort.c:81
     5# 0x0000558647D6BD3D in 
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
     6# 0x0000558647D5E37A in 
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
     7# google::LogMessage::SendToLog() in 
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
     8# google::LogMessage::Flush() in 
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
     9# google::LogMessageFatal::~LogMessageFatal() in 
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
    10# 
doris::SwitchResourceContext::SwitchResourceContext(std::shared_ptr<doris::ResourceContext>
 const&) in /mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
    11# doris::MemTableWriter::flush_async() at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/memtable_writer.cpp:159
    12# doris::MemTableMemoryLimiter::_flush_active_memtables(unsigned long, 
long) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/memtable_memory_limiter.cpp:262
    13# doris::MemTableMemoryLimiter::flush_workload_group_memtables(unsigned 
long, long) in /mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
    14# 
doris::WorkloadGroupMgr::flush_memtable_from_current_group_(std::shared_ptr<doris::WorkloadGroup>,
 long) at 
/home/zcp/repo_center/doris_master/doris/be/src/runtime/workload_group/workload_group_manager.cpp:568
    15# doris::WorkloadGroupMgr::handle_paused_queries() in 
/mnt/hdd01/ci/doris-deploy-master-local/be/lib/doris_be
    16# doris::Daemon::memory_maintenance_thread() at 
/home/zcp/repo_center/doris_master/doris/be/src/common/daemon.cpp:329
    17# doris::Thread::supervise_thread(void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/util/thread.cpp:499
    18# start_thread at ./nptl/pthread_create.c:442
    19# 0x00007F60F71A9850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
    ```
---
 be/src/olap/memtable_writer.cpp     | 3 ++-
 be/src/runtime/load_channel.cpp     | 1 +
 be/src/runtime/load_channel.h       | 2 --
 be/src/runtime/load_channel_mgr.cpp | 1 -
 be/src/runtime/thread_context.cpp   | 1 -
 5 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/be/src/olap/memtable_writer.cpp b/be/src/olap/memtable_writer.cpp
index 1a0e1e99b69..25efc82612b 100644
--- a/be/src/olap/memtable_writer.cpp
+++ b/be/src/olap/memtable_writer.cpp
@@ -152,9 +152,10 @@ Status MemTableWriter::_flush_memtable_async() {
 
 Status MemTableWriter::flush_async() {
     std::lock_guard<std::mutex> l(_lock);
-    // Two calling paths:
+    // Three calling paths:
     // 1. call by local, from `VTabletWriterV2::_write_memtable`.
     // 2. call by remote, from `LoadChannelMgr::_get_load_channel`.
+    // 3. call by daemon thread, from `handle_paused_queries` -> 
`flush_workload_group_memtables`.
     SCOPED_SWITCH_RESOURCE_CONTEXT(_resource_ctx);
     if (!_is_init || _is_closed) {
         // This writer is uninitialized or closed before flushing, do nothing.
diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp
index ce10666d84c..d745132e589 100644
--- a/be/src/runtime/load_channel.cpp
+++ b/be/src/runtime/load_channel.cpp
@@ -174,6 +174,7 @@ Status LoadChannel::add_batch(const 
PTabletWriterAddBlockRequest& request,
                               PTabletWriterAddBlockResult* response) {
     SCOPED_TIMER(_add_batch_timer);
     COUNTER_UPDATE(_add_batch_times, 1);
+    SCOPED_ATTACH_TASK(_resource_ctx);
     int64_t index_id = request.index_id();
     // 1. get tablets channel
     std::shared_ptr<BaseTabletsChannel> channel;
diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h
index 8db5ea50993..31164222a95 100644
--- a/be/src/runtime/load_channel.h
+++ b/be/src/runtime/load_channel.h
@@ -69,8 +69,6 @@ public:
 
     bool is_high_priority() const { return _is_high_priority; }
 
-    std::shared_ptr<ResourceContext> resource_ctx() const { return 
_resource_ctx; }
-
     WorkloadGroupPtr workload_group() const { return 
_resource_ctx->workload_group(); }
 
     RuntimeProfile::Counter* get_mgr_add_batch_timer() { return 
_mgr_add_batch_timer; }
diff --git a/be/src/runtime/load_channel_mgr.cpp 
b/be/src/runtime/load_channel_mgr.cpp
index 0bf1bf18670..55db6564488 100644
--- a/be/src/runtime/load_channel_mgr.cpp
+++ b/be/src/runtime/load_channel_mgr.cpp
@@ -144,7 +144,6 @@ Status LoadChannelMgr::add_batch(const 
PTabletWriterAddBlockRequest& request,
         return status;
     }
     SCOPED_TIMER(channel->get_mgr_add_batch_timer());
-    SCOPED_ATTACH_TASK(channel->resource_ctx());
 
     if (!channel->is_high_priority()) {
         // 2. check if mem consumption exceed limit
diff --git a/be/src/runtime/thread_context.cpp 
b/be/src/runtime/thread_context.cpp
index 266515ae2a6..7db5c06d1a9 100644
--- a/be/src/runtime/thread_context.cpp
+++ b/be/src/runtime/thread_context.cpp
@@ -57,7 +57,6 @@ AttachTask::~AttachTask() {
 
 SwitchResourceContext::SwitchResourceContext(const 
std::shared_ptr<ResourceContext>& rc) {
     DCHECK(rc != nullptr);
-    DCHECK(thread_context()->is_attach_task());
     doris::ThreadLocalHandle::create_thread_local_if_not_exits();
     if (rc != thread_context()->resource_ctx()) {
         signal::set_signal_task_id(rc->task_controller()->task_id());


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to