This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 848fda6afd31e94f906ddba90625fc1d6e391cf2
Author: Hongkun Xu <xuhongkun...@163.com>
AuthorDate: Sun Jun 30 12:56:14 2024 +0800

    [fix](cgroup memory) Correct cgroup mem info cache (#36966)
    
    ## Proposed changes
    After upgrading to Doris 2.1.3, we noticed that the "sys available
    memory" in be.INFO continuously decreases until it falls below the
    warning water mark, leading to persistent garbage collection (GC)
    despite the actual memory usage being very low. And The cache in cgroup
    mem info is always 0. Consequently, I identified an error in the
    calculation of available memory in cgroup memory:
    
    1. The memory information for cgroup memory is stored in the file
    "memory.stat" rather than "memory.meminfo" (in fact, the
    "memory.meminfo" file does not exist). You can see the files under the
    cgroup path in the attached screenshot1.
    2. The output content of "memory.stat" is shown in the screenshot1
    below.
    
    <img width="1720" alt="image"
    
src="https://github.com/apache/doris/assets/38196564/e654322e-9bf4-4f5e-951f-99e101ebbf47";>
    <img width="1364" alt="image"
    
src="https://github.com/apache/doris/assets/38196564/02cf8899-7618-4d5f-bf59-68fa0c90ebf2";>
    
    
    <!--Describe your changes.-->
    My change is about two steps:
    1. Modified the file name for mem info in cgroup.
    2. Modified the process for extracting the cache from cgroup.
    
    Co-authored-by: Xinyi Zou <zouxiny...@gmail.com>
---
 be/src/util/cgroup_util.cpp | 2 +-
 be/src/util/mem_info.cpp    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/be/src/util/cgroup_util.cpp b/be/src/util/cgroup_util.cpp
index a2c3e294e66..9ad78696a6f 100644
--- a/be/src/util/cgroup_util.cpp
+++ b/be/src/util/cgroup_util.cpp
@@ -184,7 +184,7 @@ Status CGroupUtil::find_cgroup_mem_info(std::string* 
file_path) {
     }
     string cgroup_path;
     RETURN_IF_ERROR(find_abs_cgroup_path("memory", &cgroup_path));
-    *file_path = cgroup_path + "/memory.meminfo";
+    *file_path = cgroup_path + "/memory.stat";
     return Status::OK();
 }
 
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index fc5d5512f1a..45e609d7100 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -154,7 +154,7 @@ void MemInfo::refresh_proc_meminfo() {
                 if (fields.size() < 2) {
                     continue;
                 }
-                std::string key = fields[0].substr(0, fields[0].size() - 1);
+                std::string key = fields[0].substr(0, fields[0].size());
 
                 StringParser::ParseResult result;
                 auto mem_value = 
StringParser::string_to_int<int64_t>(fields[1].data(),
@@ -180,19 +180,19 @@ void MemInfo::refresh_proc_meminfo() {
             // 
https://serverfault.com/questions/902009/the-memory-usage-reported-in-cgroup-differs-from-the-free-command
             // memory.usage_in_bytes ~= free.used + free.(buff/cache) - (buff)
             // so, memory.usage_in_bytes - memory.meminfo["Cached"]
-            _s_cgroup_mem_usage = cgroup_mem_usage - 
_s_cgroup_mem_info_bytes["Cached"];
+            _s_cgroup_mem_usage = cgroup_mem_usage - 
_s_cgroup_mem_info_bytes["cache"];
             // wait 10s, 100 * 100ms, avoid too frequently.
             _s_cgroup_mem_refresh_wait_times = -100;
             LOG(INFO) << "Refresh cgroup memory win, refresh again after 10s, 
cgroup mem limit: "
                       << _s_cgroup_mem_limit << ", cgroup mem usage: " << 
_s_cgroup_mem_usage
-                      << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["Cached"];
+                      << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["cache"];
         } else {
             // find cgroup failed, wait 300s, 1000 * 100ms.
             _s_cgroup_mem_refresh_wait_times = -3000;
             LOG(INFO)
                     << "Refresh cgroup memory failed, refresh again after 
300s, cgroup mem limit: "
                     << _s_cgroup_mem_limit << ", cgroup mem usage: " << 
_s_cgroup_mem_usage
-                    << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["Cached"];
+                    << ", cgroup mem info cached: " << 
_s_cgroup_mem_info_bytes["cache"];
         }
     } else {
         if (config::enable_use_cgroup_memory_info) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to