This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 25c864dfea3 [fix](cloud) Fix balanced_tablets_shards leak memory and 
fix some file cache case (#59093)
25c864dfea3 is described below

commit 25c864dfea3cc71809bdc04a83dd783c78671a38
Author: deardeng <[email protected]>
AuthorDate: Tue Dec 23 02:33:59 2025 +0800

    [fix](cloud) Fix balanced_tablets_shards leak memory and fix some file 
cache case (#59093)
    
    ### What problem does this PR solve?
    1. When fixing the issue of CloudWarmUpManager leaking a small amount of
    memory and failing to remove (tablet_id, peer_addr) from memory during
    warm-up balance when file cache meta is not retrieved from source, the
    issue was resolved.
    2. Fixed several file cache Docker cases affected by small file
    optimization PR(https://github.com/apache/doris/pull/57770)
    
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [x] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [x] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [x] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/cloud/cloud_warm_up_manager.cpp             | 31 ++++++++++++++++++++++
 be/src/cloud/cloud_warm_up_manager.h               |  2 ++
 be/src/io/cache/block_file_cache_downloader.cpp    | 29 +-------------------
 ...est_balance_use_compute_group_properties.groovy |  3 ++-
 .../cloud_p0/balance/test_balance_warm_up.groovy   |  3 ++-
 .../test_balance_warm_up_sync_global_config.groovy |  3 ++-
 .../test_balance_warm_up_task_abnormal.groovy      |  3 ++-
 .../test_balance_warm_up_use_peer_cache.groovy     |  3 ++-
 ...e_warm_up_with_compaction_use_peer_cache.groovy |  3 ++-
 .../balance/test_peer_read_async_warmup.groovy     |  1 +
 .../tablets/test_clean_stale_rs_file_cache.groovy  |  3 ++-
 .../test_clean_stale_rs_index_file_cache.groovy    |  3 ++-
 .../test_clean_tablet_when_drop_force_table.groovy |  3 ++-
 .../test_clean_tablet_when_rebalance.groovy        |  3 ++-
 14 files changed, 55 insertions(+), 38 deletions(-)

diff --git a/be/src/cloud/cloud_warm_up_manager.cpp 
b/be/src/cloud/cloud_warm_up_manager.cpp
index 2e00592d0a3..272b690facf 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -19,6 +19,8 @@
 
 #include <bthread/condition_variable.h>
 #include <bthread/mutex.h>
+#include <bthread/unstable.h>
+#include <butil/time.h>
 #include <bvar/bvar.h>
 #include <bvar/reducer.h>
 
@@ -795,10 +797,39 @@ void CloudWarmUpManager::record_balanced_tablet(int64_t 
tablet_id, const std::st
     meta.brpc_port = brpc_port;
     shard.tablets.emplace(tablet_id, std::move(meta));
     g_balance_tablet_be_mapping_size << 1;
+    schedule_remove_balanced_tablet(tablet_id);
     VLOG_DEBUG << "Recorded balanced warm up cache tablet: tablet_id=" << 
tablet_id
                << ", host=" << host << ":" << brpc_port;
 }
 
+void CloudWarmUpManager::schedule_remove_balanced_tablet(int64_t tablet_id) {
+    // Use std::make_unique to avoid raw pointer allocation
+    auto tablet_id_ptr = std::make_unique<int64_t>(tablet_id);
+    unsigned long expired_ms = g_tablet_report_inactive_duration_ms;
+    if (doris::config::cache_read_from_peer_expired_seconds > 0 &&
+        doris::config::cache_read_from_peer_expired_seconds <=
+                g_tablet_report_inactive_duration_ms / 1000) {
+        expired_ms = doris::config::cache_read_from_peer_expired_seconds * 
1000;
+    }
+    bthread_timer_t timer_id;
+    // ATTN: The timer callback will reclaim ownership of the tablet_id_ptr, 
so we need to release it after the timer is added.
+    if (const int rc = bthread_timer_add(&timer_id, 
butil::milliseconds_from_now(expired_ms),
+                                         clean_up_expired_mappings, 
tablet_id_ptr.get());
+        rc == 0) {
+        tablet_id_ptr.release();
+    } else {
+        LOG(WARNING) << "Fail to add timer for clean up expired mappings for 
tablet_id="
+                     << tablet_id << " rc=" << rc;
+    }
+}
+
+void CloudWarmUpManager::clean_up_expired_mappings(void* arg) {
+    std::unique_ptr<int64_t> tid(static_cast<int64_t*>(arg));
+    auto& manager = 
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
+    manager.remove_balanced_tablet(*tid);
+    VLOG_DEBUG << "Removed expired balanced warm up cache tablet: tablet_id=" 
<< *tid;
+}
+
 std::optional<std::pair<std::string, int32_t>> 
CloudWarmUpManager::get_balanced_tablet_info(
         int64_t tablet_id) {
     auto& shard = get_shard(tablet_id);
diff --git a/be/src/cloud/cloud_warm_up_manager.h 
b/be/src/cloud/cloud_warm_up_manager.h
index 8725dc06939..f5b00394a27 100644
--- a/be/src/cloud/cloud_warm_up_manager.h
+++ b/be/src/cloud/cloud_warm_up_manager.h
@@ -98,6 +98,8 @@ public:
     std::unordered_map<int64_t, std::pair<std::string, int32_t>> 
get_all_balanced_tablets() const;
 
 private:
+    void schedule_remove_balanced_tablet(int64_t tablet_id);
+    static void clean_up_expired_mappings(void* arg);
     void handle_jobs();
 
     Status _do_warm_up_rowset(RowsetMeta& rs_meta, std::vector<TReplicaInfo>& 
replicas,
diff --git a/be/src/io/cache/block_file_cache_downloader.cpp 
b/be/src/io/cache/block_file_cache_downloader.cpp
index 27f046140b3..5113c7f8b3e 100644
--- a/be/src/io/cache/block_file_cache_downloader.cpp
+++ b/be/src/io/cache/block_file_cache_downloader.cpp
@@ -173,14 +173,6 @@ std::unordered_map<std::string, RowsetMetaSharedPtr> 
snapshot_rs_metas(BaseTable
     return id_to_rowset_meta_map;
 }
 
-static void clean_up_expired_mappings(void* arg) {
-    // Reclaim ownership with unique_ptr for automatic memory management
-    std::unique_ptr<int64_t> tablet_id(static_cast<int64_t*>(arg));
-    auto& manager = 
ExecEnv::GetInstance()->storage_engine().to_cloud().cloud_warm_up_manager();
-    manager.remove_balanced_tablet(*tablet_id);
-    VLOG_DEBUG << "Removed expired balanced warm up cache tablet: tablet_id=" 
<< *tablet_id;
-}
-
 void FileCacheBlockDownloader::download_file_cache_block(
         const DownloadTask::FileCacheBlockMetaVec& metas) {
     std::unordered_set<int64_t> synced_tablets;
@@ -236,27 +228,8 @@ void FileCacheBlockDownloader::download_file_cache_block(
                                << "]";
                 }
             }
-            // Use std::make_unique to avoid raw pointer allocation
-            auto tablet_id_ptr = std::make_unique<int64_t>(tablet_id);
-            unsigned long expired_ms = g_tablet_report_inactive_duration_ms;
-            if (doris::config::cache_read_from_peer_expired_seconds > 0 &&
-                doris::config::cache_read_from_peer_expired_seconds <=
-                        g_tablet_report_inactive_duration_ms / 1000) {
-                expired_ms = 
doris::config::cache_read_from_peer_expired_seconds * 1000;
-            }
-            bthread_timer_t timer_id;
-            // ATTN: The timer callback will reclaim ownership of the 
tablet_id_ptr, so we need to release it after the timer is added.
-            if (const int rc =
-                        bthread_timer_add(&timer_id, 
butil::milliseconds_from_now(expired_ms),
-                                          clean_up_expired_mappings, 
tablet_id_ptr.get());
-                rc == 0) {
-                tablet_id_ptr.release();
-            } else {
-                LOG(WARNING) << "Fail to add timer for clean up expired 
mappings for tablet_id="
-                             << tablet_id << " rc=" << rc;
-            }
             LOG(INFO) << "download_file_cache_block: download_done, 
tablet_Id=" << tablet_id
-                      << " status=" << st.to_string() << " expired_ms=" << 
expired_ms;
+                      << " status=" << st.to_string();
         };
 
         std::string path;
diff --git 
a/regression-test/suites/cloud_p0/balance/test_balance_use_compute_group_properties.groovy
 
b/regression-test/suites/cloud_p0/balance/test_balance_use_compute_group_properties.groovy
index f96119ec29b..de54b93f2d8 100644
--- 
a/regression-test/suites/cloud_p0/balance/test_balance_use_compute_group_properties.groovy
+++ 
b/regression-test/suites/cloud_p0/balance/test_balance_use_compute_group_properties.groovy
@@ -36,7 +36,8 @@ suite('test_balance_use_compute_group_properties', 'docker') {
         'report_tablet_interval_seconds=1',
         'schedule_sync_tablets_interval_s=18000',
         'disable_auto_compaction=true',
-        'sys_log_verbose_modules=*'
+        'sys_log_verbose_modules=*',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up.groovy 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up.groovy
index db28efafad8..037d54e1d6e 100644
--- a/regression-test/suites/cloud_p0/balance/test_balance_warm_up.groovy
+++ b/regression-test/suites/cloud_p0/balance/test_balance_warm_up.groovy
@@ -37,7 +37,8 @@ suite('test_balance_warm_up', 'docker') {
         'disable_auto_compaction=true',
         'sys_log_verbose_modules=*',
         'cache_read_from_peer_expired_seconds=100',
-        'enable_cache_read_from_peer=true'
+        'enable_cache_read_from_peer=true',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_sync_global_config.groovy
 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_sync_global_config.groovy
index 19c252fba85..f2af4b086a5 100644
--- 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_sync_global_config.groovy
+++ 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_sync_global_config.groovy
@@ -36,7 +36,8 @@ suite('test_balance_warm_up_sync_cache', 'docker') {
         'report_tablet_interval_seconds=1',
         'schedule_sync_tablets_interval_s=18000',
         'disable_auto_compaction=true',
-        'sys_log_verbose_modules=*'
+        'sys_log_verbose_modules=*',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_task_abnormal.groovy
 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_task_abnormal.groovy
index 0938126d61c..fe5dc84ef67 100644
--- 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_task_abnormal.groovy
+++ 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_task_abnormal.groovy
@@ -36,7 +36,8 @@ suite('test_balance_warm_up_task_abnormal', 'docker') {
         'report_tablet_interval_seconds=1',
         'schedule_sync_tablets_interval_s=18000',
         'disable_auto_compaction=true',
-        'sys_log_verbose_modules=*'
+        'sys_log_verbose_modules=*',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_use_peer_cache.groovy
 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_use_peer_cache.groovy
index f9202e702ed..4eeaac1b5e6 100644
--- 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_use_peer_cache.groovy
+++ 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_use_peer_cache.groovy
@@ -40,7 +40,8 @@ suite('test_balance_warm_up_use_peer_cache', 'docker') {
         'disable_auto_compaction=true',
         'sys_log_verbose_modules=*',
         'cache_read_from_peer_expired_seconds=100',
-        'enable_cache_read_from_peer=true'
+        'enable_cache_read_from_peer=true',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_with_compaction_use_peer_cache.groovy
 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_with_compaction_use_peer_cache.groovy
index be120221019..548cf1c3fc7 100644
--- 
a/regression-test/suites/cloud_p0/balance/test_balance_warm_up_with_compaction_use_peer_cache.groovy
+++ 
b/regression-test/suites/cloud_p0/balance/test_balance_warm_up_with_compaction_use_peer_cache.groovy
@@ -41,7 +41,8 @@ suite('test_balance_warm_up_with_compaction_use_peer_cache', 
'docker') {
         'sys_log_verbose_modules=*',
         'cumulative_compaction_min_deltas=5',
         'cache_read_from_peer_expired_seconds=100',
-        'enable_cache_read_from_peer=true'
+        'enable_cache_read_from_peer=true',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/balance/test_peer_read_async_warmup.groovy 
b/regression-test/suites/cloud_p0/balance/test_peer_read_async_warmup.groovy
index ba9a61409bb..8a3469ad8e1 100644
--- a/regression-test/suites/cloud_p0/balance/test_peer_read_async_warmup.groovy
+++ b/regression-test/suites/cloud_p0/balance/test_peer_read_async_warmup.groovy
@@ -39,6 +39,7 @@ suite('test_peer_read_async_warmup', 'docker') {
         'disable_auto_compaction=true',
         'sys_log_verbose_modules=*',
         'enable_cache_read_from_peer=true',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy 
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
index 807e51ae95f..e161b44ce0f 100644
--- 
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
+++ 
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_file_cache.groovy
@@ -34,7 +34,8 @@ suite('test_clean_stale_rs_file_cache', 'docker') {
         'cumulative_compaction_min_deltas=5',
         'tablet_rowset_stale_sweep_by_size=false',
         'tablet_rowset_stale_sweep_time_sec=60',
-        'vacuum_stale_rowsets_interval_s=10'
+        'vacuum_stale_rowsets_interval_s=10',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_index_file_cache.groovy
 
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_index_file_cache.groovy
index e99a5f96b36..59e13b1030b 100644
--- 
a/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_index_file_cache.groovy
+++ 
b/regression-test/suites/cloud_p0/tablets/test_clean_stale_rs_index_file_cache.groovy
@@ -34,7 +34,8 @@ suite('test_clean_stale_rs_index_file_cache', 'docker') {
         'cumulative_compaction_min_deltas=5',
         'tablet_rowset_stale_sweep_by_size=false',
         'tablet_rowset_stale_sweep_time_sec=60',
-        'vacuum_stale_rowsets_interval_s=10'
+        'vacuum_stale_rowsets_interval_s=10',
+        'enable_packed_file=false',
     ]
     options.setFeNum(1)
     options.setBeNum(1)
diff --git 
a/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
 
b/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
index ff8753a857e..b0e55bd8aef 100644
--- 
a/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
+++ 
b/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_drop_force_table.groovy
@@ -34,7 +34,8 @@ suite('test_clean_tablet_when_drop_force_table', 'docker') {
         'report_tablet_interval_seconds=1',
         'write_buffer_size=10240',
         'write_buffer_size_for_agg=10240',
-        'sys_log_verbose_modules=task_worker_pool'
+        'sys_log_verbose_modules=task_worker_pool',
+        'enable_packed_file=false',
     ]
     options.setFeNum(3)
     options.setBeNum(3)
diff --git 
a/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_rebalance.groovy
 
b/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_rebalance.groovy
index 75c2c410a23..964ba517bb7 100644
--- 
a/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_rebalance.groovy
+++ 
b/regression-test/suites/cloud_p0/tablets/test_clean_tablet_when_rebalance.groovy
@@ -34,7 +34,8 @@ suite('test_clean_tablet_when_rebalance', 'docker') {
     options.beConfigs += [
         'report_tablet_interval_seconds=1',
         'write_buffer_size=10240',
-        'write_buffer_size_for_agg=10240'
+        'write_buffer_size_for_agg=10240',
+        'enable_packed_file=false',
     ]
     options.setFeNum(3)
     options.setBeNum(3)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to