xinyiZzz commented on code in PR #19802: URL: https://github.com/apache/doris/pull/19802#discussion_r1198523830
########## be/src/runtime/task_group/task_group.cpp: ########## @@ -132,17 +148,23 @@ void TaskGroup::remove_mem_tracker_limiter(std::shared_ptr<MemTrackerLimiter> me } int64_t TaskGroup::memory_limit_gc() { - std::string name; - int64_t memory_limit; - { - std::shared_lock<std::shared_mutex> rl {_mutex}; - name = _name; - memory_limit = _memory_limit; - } - return MemTrackerLimiter::tg_memory_limit_gc(_id, name, memory_limit, + const auto& [memory_limit, name] = _memory_limit_and_name(); + auto used = memory_used(); + return MemTrackerLimiter::tg_memory_limit_gc(used - memory_limit, used, _id, name, memory_limit, _mem_tracker_limiter_pool); } +int64_t TaskGroup::soft_memory_limit_gc(int64_t request_free_memory, int64_t used_memory) { Review Comment: Add comment, the difference between `soft_memory_limit_gc` and `memory_limit_gc` is that `soft_memory_limit_gc` request_free_memory <= used - memory_limit maybe can change name ########## be/src/util/mem_info.cpp: ########## @@ -215,6 +221,52 @@ bool MemInfo::process_full_gc() { return false; } +int64_t MemInfo::tg_hard_memory_limit_gc() { + std::vector<taskgroup::TaskGroupPtr> task_groups; + taskgroup::TaskGroupManager::instance()->get_resource_groups( + [](const taskgroup::TaskGroupPtr& task_group) { + return !task_group->enable_overcommit(); + }, + &task_groups); + + int64_t total_free_memory = 0; + for (const auto& task_group : task_groups) { + total_free_memory += task_group->memory_limit_gc(); + } + return total_free_memory; +} + +int64_t MemInfo::tg_soft_memory_limit_gc(int64_t request_free_memory) { + std::vector<taskgroup::TaskGroupPtr> task_groups; + taskgroup::TaskGroupManager::instance()->get_resource_groups( + [](const taskgroup::TaskGroupPtr& task_group) { + return task_group->enable_overcommit(); + }, + &task_groups); + + int64_t total_exceeded_memory = 0; + std::vector<int64_t> used_memorys; + std::vector<int64_t> exceeded_memorys; + for (const auto& task_group : task_groups) { + auto used_memory = task_group->memory_used(); + auto exceeded = used_memory - task_group->memory_limit(); + auto exceeded_memory = exceeded > 0 ? exceeded : 0; + total_exceeded_memory += exceeded_memory; + used_memorys.emplace_back(used_memory); + exceeded_memorys.emplace_back(exceeded_memory); + } + + int64_t total_free_memory = 0; + for (int i = 0; i < task_groups.size(); ++i) { + // Use resource group exceeded memory as a weight + int64_t tg_need_free_memory = static_cast<double>(exceeded_memorys[i]) / Review Comment: maybe `total_exceeded_memory > request_free_memory ? static_cast<double>(exceeded_memorys[i]) : static_cast<double>(exceeded_memorys[i]) / total_exceeded_memory * request_free_memory` ########## be/src/runtime/memory/mem_tracker_limiter.cpp: ########## @@ -487,21 +487,14 @@ int64_t MemTrackerLimiter::free_top_overcommit_query( } int64_t MemTrackerLimiter::tg_memory_limit_gc( - uint64_t id, const std::string& name, int64_t memory_limit, + int64_t need_free_mem, int64_t used_memory, uint64_t id, const std::string& name, + int64_t memory_limit, std::vector<taskgroup::TgTrackerLimiterGroup>& tracker_limiter_groups) { - int64_t used_memory = 0; - for (auto& mem_tracker_group : tracker_limiter_groups) { - std::lock_guard<std::mutex> l(mem_tracker_group.group_lock); - for (const auto& tracker : mem_tracker_group.trackers) { - used_memory += tracker->consumption(); - } - } - - if (used_memory <= memory_limit) { + need_free_mem = std::min(need_free_mem, used_memory - memory_limit); Review Comment: Whether the free memory size can be determined in `MemInfo::tg_soft_memory_limit_gc` ########## be/src/util/mem_info.cpp: ########## @@ -144,6 +146,8 @@ bool MemInfo::process_minor_gc() { // TODO add freed_mem SegmentLoader::instance()->prune(); + freed_mem += tg_soft_memory_limit_gc(_s_process_minor_gc_size - freed_mem); Review Comment: Add more detailed comments for `process_minor_gc` and `process_full_gc`. GC order in each case. ########## be/src/util/mem_info.cpp: ########## @@ -187,6 +191,8 @@ bool MemInfo::process_full_gc() { } } + freed_mem += tg_soft_memory_limit_gc(_s_process_minor_gc_size - freed_mem); Review Comment: `_s_process_minor_gc_size` changed to `_s_process_full_gc_size` ########## be/src/util/mem_info.cpp: ########## @@ -144,6 +146,8 @@ bool MemInfo::process_minor_gc() { // TODO add freed_mem SegmentLoader::instance()->prune(); + freed_mem += tg_soft_memory_limit_gc(_s_process_minor_gc_size - freed_mem); Review Comment: 现在memory exceed limit的行为是这样么,辛苦确认下: group-A没有overcommit,只有一个query-a,query—a已经overcommit。 group-B已经overcommit,其中query-b已经overcommit,其他query都没有overcommit。 进程内存超限时 minor gc, 会先cancel query-b,再按照内存从大到小cancel group-B的其他query,直到group-B不再overcommit,然后再cancel query-a -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org