This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 4cb80c5733 [memtracker]fix fix_memtracker_performance_ (#10629) 4cb80c5733 is described below commit 4cb80c5733a1a70a016cee8ab902f8d38f6acc7f Author: Kidd <107781942+k-i-...@users.noreply.github.com> AuthorDate: Mon Jul 11 08:35:05 2022 +0800 [memtracker]fix fix_memtracker_performance_ (#10629) --- be/src/common/config.h | 2 +- be/src/runtime/exec_env.h | 3 - be/src/runtime/load_channel.h | 3 +- be/src/runtime/mem_tracker.cpp | 2 +- be/src/runtime/tcmalloc_hook.h | 30 +++++-- be/src/runtime/thread_context.cpp | 15 ++-- be/src/runtime/thread_context.h | 136 ++++++++++++++------------------ be/src/runtime/thread_mem_tracker_mgr.h | 9 +-- 8 files changed, 94 insertions(+), 106 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index d212617867..a9ff1624f9 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -658,7 +658,7 @@ CONF_mInt16(mem_tracker_level, "0"); // smaller than this value will continue to accumulate. specified as number of bytes. // Decreasing this value will increase the frequency of consume/release. // Increasing this value will cause MemTracker statistics to be inaccurate. -CONF_mInt32(mem_tracker_consume_min_size_bytes, "2097152"); +CONF_mInt32(mem_tracker_consume_min_size_bytes, "4194304"); // When MemTracker is a negative value, it is considered that a memory leak has occurred, // but the actual MemTracker records inaccurately will also cause a negative value, diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index d37e7e2ae1..7a02dae866 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -73,8 +73,6 @@ class ClientCache; class HeartbeatFlags; -static bool exec_env_existed = false; - // Execution environment for queries/plan fragments. // Contains all required global structures, and handles to // singleton services. Clients must call StartServices exactly @@ -90,7 +88,6 @@ public: /// we return the most recently created instance. static ExecEnv* GetInstance() { static ExecEnv s_exec_env; - exec_env_existed = true; return &s_exec_env; } diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h index 20ef476dd9..91005a6b89 100644 --- a/be/src/runtime/load_channel.h +++ b/be/src/runtime/load_channel.h @@ -40,7 +40,8 @@ class Cache; class LoadChannel { public: LoadChannel(const UniqueId& load_id, std::shared_ptr<MemTracker>& mem_tracker, - int64_t timeout_s, bool is_high_priority, const std::string& sender_ip, bool is_ve); + int64_t timeout_s, bool is_high_priority, const std::string& sender_ip, + bool is_vec); ~LoadChannel(); // open a new load channel if not exist diff --git a/be/src/runtime/mem_tracker.cpp b/be/src/runtime/mem_tracker.cpp index d53e975d64..71e89f8b1e 100644 --- a/be/src/runtime/mem_tracker.cpp +++ b/be/src/runtime/mem_tracker.cpp @@ -185,7 +185,7 @@ void MemTracker::init_virtual() { MemTracker::~MemTracker() { consume(_untracked_mem.exchange(0)); // before memory_leak_check // TCMalloc hook will be triggered during destructor memtracker, may cause crash. - if (_label == "Process") STOP_THREAD_LOCAL_MEM_TRACKER(false); + if (_label == "Process") doris::thread_local_ctx._init = false; if (!_virtual && config::memory_leak_detection) MemTracker::memory_leak_check(this); if (!_virtual && parent()) { // Do not call release on the parent tracker to avoid repeated releases. diff --git a/be/src/runtime/tcmalloc_hook.h b/be/src/runtime/tcmalloc_hook.h index dd0e1c788f..8b3a0290ed 100644 --- a/be/src/runtime/tcmalloc_hook.h +++ b/be/src/runtime/tcmalloc_hook.h @@ -37,18 +37,32 @@ // destructor to control the behavior of consume can lead to unexpected behavior, // like this: if (LIKELY(doris::start_thread_mem_tracker)) { void new_hook(const void* ptr, size_t size) { - if (doris::thread_local_ctx._init) { - doris::tls_ctx()->consume_mem(tc_nallocx(size, 0)); - } else if (doris::exec_env_existed && doris::ExecEnv::GetInstance()->initialized()) { - doris::MemTracker::get_process_tracker()->consume(tc_nallocx(size, 0)); + if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_tls != nullptr) { + // Currently in bthread, consume thread context mem tracker in bthread tls. + if (doris::btls_key != doris::bthread_tls_key) { + // pthread switch occurs, updating bthread_tls and bthread_tls_key cached in pthread tls. + doris::bthread_tls = + static_cast<doris::ThreadContext*>(bthread_getspecific(doris::btls_key)); + doris::bthread_tls_key = doris::btls_key; + } + doris::bthread_tls->_thread_mem_tracker_mgr->cache_consume(tc_nallocx(size, 0)); + } else if (doris::thread_local_ctx._init) { + doris::thread_local_ctx._tls->_thread_mem_tracker_mgr->cache_consume(tc_nallocx(size, 0)); } } void delete_hook(const void* ptr) { - if (doris::thread_local_ctx._init) { - doris::tls_ctx()->release_mem(tc_malloc_size(const_cast<void*>(ptr))); - } else if (doris::exec_env_existed && doris::ExecEnv::GetInstance()->initialized()) { - doris::MemTracker::get_process_tracker()->release(tc_malloc_size(const_cast<void*>(ptr))); + if (doris::btls_key != doris::EMPTY_BTLS_KEY && doris::bthread_tls != nullptr) { + if (doris::btls_key != doris::bthread_tls_key) { + doris::bthread_tls = + static_cast<doris::ThreadContext*>(bthread_getspecific(doris::btls_key)); + doris::bthread_tls_key = doris::btls_key; + } + doris::bthread_tls->_thread_mem_tracker_mgr->cache_consume( + -tc_malloc_size(const_cast<void*>(ptr))); + } else if (doris::thread_local_ctx._init) { + doris::thread_local_ctx._tls->_thread_mem_tracker_mgr->cache_consume( + -tc_malloc_size(const_cast<void*>(ptr))); } } diff --git a/be/src/runtime/thread_context.cpp b/be/src/runtime/thread_context.cpp index a439b4ca8f..63d84cf21d 100644 --- a/be/src/runtime/thread_context.cpp +++ b/be/src/runtime/thread_context.cpp @@ -22,15 +22,10 @@ namespace doris { -DEFINE_STATIC_THREAD_LOCAL(ThreadContext, ThreadContextPtr, thread_local_ctx); +DEFINE_STATIC_THREAD_LOCAL(ThreadContext, ThreadContextPtr, _tls); ThreadContextPtr::ThreadContextPtr() { - INIT_STATIC_THREAD_LOCAL(ThreadContext, thread_local_ctx); - _init = true; -} - -ThreadContext* ThreadContextPtr::get() { - return thread_local_ctx; + INIT_STATIC_THREAD_LOCAL(ThreadContext, _tls); } AttachTaskThread::AttachTaskThread(const ThreadContext::TaskType& type, const std::string& task_id, @@ -170,8 +165,10 @@ SwitchBthread::SwitchBthread() { DCHECK(tls->type() == ThreadContext::TaskType::UNKNOWN); tls->_thread_mem_tracker_mgr->clear_untracked_mems(); } - tls->_thread_mem_tracker_mgr->init(); + tls->init(); tls->set_type(ThreadContext::TaskType::BRPC); + bthread_tls_key = btls_key; + bthread_tls = tls; #endif } @@ -181,6 +178,8 @@ SwitchBthread::~SwitchBthread() { tls->_thread_mem_tracker_mgr->clear_untracked_mems(); tls->_thread_mem_tracker_mgr->init(); tls->set_type(ThreadContext::TaskType::UNKNOWN); + bthread_tls = nullptr; + bthread_tls_key = EMPTY_BTLS_KEY; #ifndef NDEBUG DorisMetrics::instance()->switch_bthread_count->increment(1); #endif // NDEBUG diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h index bf347e0a28..b2bddb2d21 100644 --- a/be/src/runtime/thread_context.h +++ b/be/src/runtime/thread_context.h @@ -32,11 +32,6 @@ // Attach to task when thread starts #define SCOPED_ATTACH_TASK_THREAD(type, ...) \ auto VARNAME_LINENUM(attach_task_thread) = AttachTaskThread(type, ##__VA_ARGS__) -// Be careful to stop the thread mem tracker, because the actual order of malloc and free memory -// may be different from the order of execution of instructions, which will cause the position of -// the memory track to be unexpected. -#define STOP_THREAD_LOCAL_MEM_TRACKER(scope) \ - auto VARNAME_LINENUM(stop_tracker) = doris::StopThreadMemTracker(scope) // Switch thread mem tracker during task execution. // After the non-query thread switches the mem tracker, if the thread will not switch the mem // tracker again in the short term, can consider manually clear_untracked_mems. @@ -87,8 +82,50 @@ namespace doris { class TUniqueId; +class ThreadContext; extern bthread_key_t btls_key; +static const bthread_key_t EMPTY_BTLS_KEY = {0, 0}; + +// Using gcc11 compiles thread_local variable on lower versions of GLIBC will report an error, +// see https://github.com/apache/doris/pull/7911 +// +// If we want to avoid this error, +// 1. For non-trivial variables in thread_local, such as std::string, you need to store them as pointers to +// ensure that thread_local is trivial, these non-trivial pointers will uniformly call destructors elsewhere. +// 2. The default destructor of the thread_local variable cannot be overridden. +// +// This is difficult to implement. Because the destructor is not overwritten, it means that the outside cannot +// be notified when the thread terminates, and the non-trivial pointers in thread_local cannot be released in time. +// The func provided by pthread and std::thread doesn't help either. +// +// So, kudu Class-scoped static thread local implementation was introduced. Solve the above problem by +// Thread-scoped thread local + Class-scoped thread local. +// +// This may look very trick, but it's the best way I can find. +// +// refer to: +// https://gcc.gnu.org/onlinedocs/gcc-3.3.1/gcc/Thread-Local.html +// https://stackoverflow.com/questions/12049684/ +// https://sourceware.org/glibc/wiki/Destructor%20support%20for%20thread_local%20variables +// https://www.jianshu.com/p/756240e837dd +// https://man7.org/linux/man-pages/man3/pthread_tryjoin_np.3.html +class ThreadContextPtr { +public: + ThreadContextPtr(); + // Cannot add destructor `~ThreadContextPtr`, otherwise it will no longer be of type POD, the reason is as above. + + // TCMalloc hook is triggered during ThreadContext construction, which may lead to deadlock. + bool _init = false; + + DECLARE_STATIC_THREAD_LOCAL(ThreadContext, _tls); +}; + +inline thread_local ThreadContextPtr thread_local_ctx; +// To avoid performance problems caused by frequently calling `bthread_getspecific` to obtain bthread TLS +// in tcmalloc hook, cache the key and value of bthread TLS in pthread TLS. +inline thread_local ThreadContext* bthread_tls; +inline thread_local bthread_key_t bthread_tls_key; // The thread context saves some info about a working thread. // 2 required info: @@ -113,10 +150,25 @@ public: "STORAGE"}; public: - ThreadContext() : _type(TaskType::UNKNOWN) { + ThreadContext() { _thread_mem_tracker_mgr.reset(new ThreadMemTrackerMgr()); + init(); + thread_local_ctx._init = true; + } + + ~ThreadContext() { + // Restore to the memory state before _init=true to ensure accurate overall memory statistics. + // Thereby ensuring that the memory alloc size is not tracked during the initialization of the + // ThreadContext before `_init = true in ThreadContextPtr()`, + // Equal to the size of the memory release that is not tracked during the destruction of the + // ThreadContext after `_init = false in ~ThreadContextPtr()`, + init(); + thread_local_ctx._init = false; + } + + void init() { + _type = TaskType::UNKNOWN; _thread_mem_tracker_mgr->init(); - start_thread_mem_tracker = true; _thread_id = get_thread_id(); } @@ -154,22 +206,6 @@ public: return ss.str(); } - void consume_mem(int64_t size) { - if (start_thread_mem_tracker) { - _thread_mem_tracker_mgr->cache_consume(size); - } else { - MemTracker::get_process_tracker()->consume(size); - } - } - - void release_mem(int64_t size) { - if (start_thread_mem_tracker) { - _thread_mem_tracker_mgr->cache_consume(-size); - } else { - MemTracker::get_process_tracker()->release(size); - } - } - // After _thread_mem_tracker_mgr is initialized, the current thread TCMalloc Hook starts to // consume/release mem_tracker. // Note that the use of shared_ptr will cause a crash. The guess is that there is an @@ -185,50 +221,12 @@ private: TUniqueId _fragment_instance_id; }; -// Using gcc11 compiles thread_local variable on lower versions of GLIBC will report an error, -// see https://github.com/apache/doris/pull/7911 -// -// If we want to avoid this error, -// 1. For non-trivial variables in thread_local, such as std::string, you need to store them as pointers to -// ensure that thread_local is trivial, these non-trivial pointers will uniformly call destructors elsewhere. -// 2. The default destructor of the thread_local variable cannot be overridden. -// -// This is difficult to implement. Because the destructor is not overwritten, it means that the outside cannot -// be notified when the thread terminates, and the non-trivial pointers in thread_local cannot be released in time. -// The func provided by pthread and std::thread doesn't help either. -// -// So, kudu Class-scoped static thread local implementation was introduced. Solve the above problem by -// Thread-scoped thread local + Class-scoped thread local. -// -// This may look very trick, but it's the best way I can find. -// -// refer to: -// https://gcc.gnu.org/onlinedocs/gcc-3.3.1/gcc/Thread-Local.html -// https://stackoverflow.com/questions/12049684/ -// https://sourceware.org/glibc/wiki/Destructor%20support%20for%20thread_local%20variables -// https://www.jianshu.com/p/756240e837dd -// https://man7.org/linux/man-pages/man3/pthread_tryjoin_np.3.html -class ThreadContextPtr { -public: - ThreadContextPtr(); - - ThreadContext* get(); - - // TCMalloc hook is triggered during ThreadContext construction, which may lead to deadlock. - bool _init = false; - -private: - DECLARE_STATIC_THREAD_LOCAL(ThreadContext, thread_local_ctx); -}; - -inline thread_local ThreadContextPtr thread_local_ctx; - static ThreadContext* tls_ctx() { ThreadContext* tls = static_cast<ThreadContext*>(bthread_getspecific(btls_key)); if (tls != nullptr) { return tls; } else { - return thread_local_ctx.get(); + return thread_local_ctx._tls; } } @@ -266,20 +264,6 @@ public: ~AttachTaskThread(); }; -class StopThreadMemTracker { -public: - explicit StopThreadMemTracker(const bool scope = true) : _scope(scope) { - start_thread_mem_tracker = false; - } - - ~StopThreadMemTracker() { - if (_scope == true) start_thread_mem_tracker = true; - } - -private: - bool _scope = true; -}; - template <bool Existed> class SwitchThreadMemTracker { public: diff --git a/be/src/runtime/thread_mem_tracker_mgr.h b/be/src/runtime/thread_mem_tracker_mgr.h index ffc6fdc01f..ada4651ed3 100644 --- a/be/src/runtime/thread_mem_tracker_mgr.h +++ b/be/src/runtime/thread_mem_tracker_mgr.h @@ -48,13 +48,6 @@ struct ConsumeErrCallBackInfo { } }; -// If there is a memory new/delete operation in the consume method, it may enter infinite recursion. -// Note: After the tracker is stopped, the memory alloc in the consume method should be released in time, -// otherwise the MemTracker statistics will be inaccurate. -// In some cases, we want to turn off thread automatic memory statistics, manually call consume. -// In addition, when ~RootTracker, TCMalloc delete hook release RootTracker will crash. -inline thread_local bool start_thread_mem_tracker = false; - // TCMalloc new/delete Hook is counted in the memory_tracker of the current thread. // // In the original design, the MemTracker consume method is called before the memory is allocated. @@ -72,7 +65,6 @@ public: _mem_trackers.clear(); _untracked_mems.clear(); _mem_tracker_labels.clear(); - start_thread_mem_tracker = false; } // After thread initialization, calling `init` again must call `clear_untracked_mems` first @@ -177,6 +169,7 @@ private: phmap::flat_hash_map<int64_t, std::string> _mem_tracker_labels; // If true, call memtracker try_consume, otherwise call consume. bool _check_limit; + // If there is a memory new/delete operation in the consume method, it may enter infinite recursion. bool _stop_consume = false; int64_t _tracker_id; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org