This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new d2a45c2ccb8 branch-4.1: [fix](filecache) reclaim expired tablet 
hotspot counters and compact sparse shards (#61834)
d2a45c2ccb8 is described below

commit d2a45c2ccb8e5683d59c4a3095a76aadcf982ef6
Author: zhengyu <[email protected]>
AuthorDate: Sat Mar 28 22:29:12 2026 +0800

    branch-4.1: [fix](filecache) reclaim expired tablet hotspot counters and 
compact sparse shards (#61834)
    
    TabletHotspot memory kept growing by about 1.8-2.7 GB per day because
    counters for cold tablets remained resident even after they no longer
    contributed to the hour/day/week hotspot windows. Their history deques
    kept consuming memory, and the sharded unordered_maps continued to hold
    expanded bucket arrays without a reclamation path.
    
    Fix this by running a maintenance pass after each hourly rollup to evict
    counters whose last access is older than the one-week window and whose
    current/day/week contributions are all zero. When a shard becomes sparse
    after eviction, rebuild the map to release bucket memory. Also add
    GC-focused diagnostics and unit tests to verify eviction, recreation,
    and shard compaction behavior.
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/cloud/cloud_tablet_hotspot.cpp          | 342 ++++++++++++++++++++++---
 be/src/cloud/cloud_tablet_hotspot.h            |  39 ++-
 be/test/cloud/cloud_tablet_hotspot_gc_test.cpp | 198 ++++++++++++++
 3 files changed, 539 insertions(+), 40 deletions(-)

diff --git a/be/src/cloud/cloud_tablet_hotspot.cpp 
b/be/src/cloud/cloud_tablet_hotspot.cpp
index e6129669690..27c64eeaeaf 100644
--- a/be/src/cloud/cloud_tablet_hotspot.cpp
+++ b/be/src/cloud/cloud_tablet_hotspot.cpp
@@ -17,33 +17,118 @@
 
 #include "cloud/cloud_tablet_hotspot.h"
 
+#include <algorithm>
 #include <chrono>
+#include <cmath>
+#include <cstdint>
 #include <mutex>
+#include <vector>
 
-#include "cloud/config.h"
-#include "runtime/exec_env.h"
-#include "storage/tablet/tablet_fwd.h"
+#include "runtime/memory/global_memory_arbitrator.h"
+#include "storage/tablet/base_tablet.h"
 
 namespace doris {
 
+namespace {
+
+using HotPartitionSnapshot =
+        std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t, 
TabletHotspotMapValue>,
+                           MapKeyHash>;
+
+constexpr uint64_t kCountHeartbeatInterval = 500000;
+constexpr uint64_t kNewCounterLogInterval = 4096;
+constexpr size_t kCompactBucketMultiplier = 4;
+constexpr size_t kCompactEraseRatioDivisor = 4;
+
+using SystemTimePoint = std::chrono::system_clock::time_point;
+using SteadyClock = std::chrono::steady_clock;
+
+double bytes_to_mb(int64_t bytes) {
+    return static_cast<double>(bytes) / (1024.0 * 1024.0);
+}
+
+double ratio_or_zero(uint64_t numerator, uint64_t denominator) {
+    if (denominator == 0) {
+        return 0.0;
+    }
+    return static_cast<double>(numerator) / static_cast<double>(denominator);
+}
+
+int64_t process_memory_usage_for_diag() {
+#ifdef BE_TEST
+    return 0;
+#else
+    return GlobalMemoryArbitrator::process_memory_usage();
+#endif
+}
+
+uint64_t count_hot_partition_entries(const HotPartitionSnapshot& snapshot) {
+    uint64_t entries = 0;
+    for (const auto& [_, partition_to_value] : snapshot) {
+        entries += partition_to_value.size();
+    }
+    return entries;
+}
+
+bool should_compact_slot(size_t slot_size_before, size_t slot_size_after, 
size_t bucket_count_after,
+                         size_t erased_count) {
+    if (erased_count == 0) {
+        return false;
+    }
+    if (slot_size_after == 0) {
+        return true;
+    }
+    return bucket_count_after > (kCompactBucketMultiplier * slot_size_after) ||
+           (slot_size_before > 0 && erased_count * kCompactEraseRatioDivisor 
>= slot_size_before);
+}
+
+} // namespace
+
 void TabletHotspot::count(const BaseTablet& tablet) {
-    size_t slot_idx = tablet.tablet_id() % s_slot_size;
+    count(tablet.tablet_id(), tablet.table_id(), tablet.index_id(), 
tablet.partition_id());
+}
+
+void TabletHotspot::count(int64_t tablet_id, int64_t table_id, int64_t 
index_id,
+                          int64_t partition_id) {
+    const uint64_t count_calls_total =
+            _count_calls_total.fetch_add(1, std::memory_order_relaxed) + 1;
+
+    size_t slot_idx = tablet_id % s_slot_size;
     auto& slot = _tablets_hotspot[slot_idx];
-    std::lock_guard lock(slot.mtx);
-    HotspotCounterPtr counter;
-    if (auto iter = slot.map.find(tablet.tablet_id()); iter == slot.map.end()) 
{
-        counter = std::make_shared<HotspotCounter>(tablet.table_id(), 
tablet.index_id(),
-                                                   tablet.partition_id());
-        slot.map.insert(std::make_pair(tablet.tablet_id(), counter));
-    } else {
-        counter = iter->second;
+    bool should_log_new_counter = false;
+    uint64_t new_counter_total = 0;
+    {
+        std::lock_guard lock(slot.mtx);
+        HotspotCounterPtr counter;
+        if (auto iter = slot.map.find(tablet_id); iter == slot.map.end()) {
+            counter = std::make_shared<HotspotCounter>(table_id, index_id, 
partition_id);
+            slot.map.insert(std::make_pair(tablet_id, counter));
+            new_counter_total = _new_counter_total.fetch_add(1, 
std::memory_order_relaxed) + 1;
+            should_log_new_counter = (new_counter_total % 
kNewCounterLogInterval == 0);
+        } else {
+            counter = iter->second;
+            _existing_hit_total.fetch_add(1, std::memory_order_relaxed);
+        }
+        counter->last_access_time = std::chrono::system_clock::now();
+        counter->cur_counter.fetch_add(1, std::memory_order_relaxed);
+    }
+
+    if (should_log_new_counter) {
+        LOG(INFO) << "tablet_hotspot_diag new_counter_total=" << 
new_counter_total
+                  << " count_calls_total=" << count_calls_total
+                  << " existing_hit_total=" << 
_existing_hit_total.load(std::memory_order_relaxed);
+    }
+    if (count_calls_total % kCountHeartbeatInterval == 0) {
+        LOG(INFO) << "tablet_hotspot_diag count_heartbeat count_calls_total=" 
<< count_calls_total
+                  << " existing_hit_total=" << 
_existing_hit_total.load(std::memory_order_relaxed)
+                  << " new_counter_total=" << 
_new_counter_total.load(std::memory_order_relaxed);
     }
-    counter->last_access_time = std::chrono::system_clock::now();
-    counter->cur_counter++;
 }
 
-TabletHotspot::TabletHotspot() {
-    _counter_thread = std::thread(&TabletHotspot::make_dot_point, this);
+TabletHotspot::TabletHotspot(bool start_counter_thread) {
+    if (start_counter_thread) {
+        _counter_thread = std::thread(&TabletHotspot::make_dot_point, this);
+    }
 }
 
 TabletHotspot::~TabletHotspot() {
@@ -108,6 +193,21 @@ void get_return_partitions(
 }
 
 void TabletHotspot::get_top_n_hot_partition(std::vector<THotTableMessage>* 
hot_tables) {
+    const uint64_t call_id =
+            _get_top_n_hot_partition_call_total.fetch_add(1, 
std::memory_order_relaxed) + 1;
+    uint64_t last_day_tables_before = 0;
+    uint64_t last_day_entries_before = 0;
+    uint64_t last_week_tables_before = 0;
+    uint64_t last_week_entries_before = 0;
+    {
+        std::lock_guard lock(_last_partitions_mtx);
+        last_day_tables_before = _last_day_hot_partitions.size();
+        last_day_entries_before = 
count_hot_partition_entries(_last_day_hot_partitions);
+        last_week_tables_before = _last_week_hot_partitions.size();
+        last_week_entries_before = 
count_hot_partition_entries(_last_week_hot_partitions);
+    }
+    const int64_t process_mem_before = process_memory_usage_for_diag();
+
     // map<pair<table_id, index_id>, map<partition_id, value>> for day
     std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t, 
TabletHotspotMapValue>,
                        MapKeyHash>
@@ -145,20 +245,52 @@ void 
TabletHotspot::get_top_n_hot_partition(std::vector<THotTableMessage>* hot_t
     });
     constexpr int N = 50;
     int return_partitions = 0;
+    const uint64_t day_tables_built = day_hot_partitions.size();
+    const uint64_t day_entries_built = 
count_hot_partition_entries(day_hot_partitions);
+    const uint64_t week_tables_built = week_hot_partitions.size();
+    const uint64_t week_entries_built = 
count_hot_partition_entries(week_hot_partitions);
+    uint64_t last_day_tables_after = 0;
+    uint64_t last_day_entries_after = 0;
+    uint64_t last_week_tables_after = 0;
+    uint64_t last_week_entries_after = 0;
 
-    std::unique_lock lock(_last_partitions_mtx);
-    get_return_partitions(day_hot_partitions, _last_day_hot_partitions, 
hot_tables,
-                          return_partitions, N);
-    get_return_partitions(week_hot_partitions, _last_week_hot_partitions, 
hot_tables,
-                          return_partitions, N);
+    {
+        std::unique_lock lock(_last_partitions_mtx);
+        get_return_partitions(day_hot_partitions, _last_day_hot_partitions, 
hot_tables,
+                              return_partitions, N);
+        get_return_partitions(week_hot_partitions, _last_week_hot_partitions, 
hot_tables,
+                              return_partitions, N);
 
-    _last_day_hot_partitions = std::move(day_hot_partitions);
-    _last_week_hot_partitions = std::move(week_hot_partitions);
+        _last_day_hot_partitions = std::move(day_hot_partitions);
+        _last_week_hot_partitions = std::move(week_hot_partitions);
+        last_day_tables_after = _last_day_hot_partitions.size();
+        last_day_entries_after = 
count_hot_partition_entries(_last_day_hot_partitions);
+        last_week_tables_after = _last_week_hot_partitions.size();
+        last_week_entries_after = 
count_hot_partition_entries(_last_week_hot_partitions);
+    }
+
+    const int64_t process_mem_after = process_memory_usage_for_diag();
+
+    LOG(INFO) << "tablet_hotspot_diag get_top_n_hot_partition call_id=" << 
call_id
+              << " day_tables_built=" << day_tables_built
+              << " day_entries_built=" << day_entries_built
+              << " week_tables_built=" << week_tables_built
+              << " week_entries_built=" << week_entries_built
+              << " returned_partitions=" << return_partitions
+              << " last_day_tables_before=" << last_day_tables_before
+              << " last_day_entries_before=" << last_day_entries_before
+              << " last_day_tables_after=" << last_day_tables_after
+              << " last_day_entries_after=" << last_day_entries_after
+              << " last_week_tables_before=" << last_week_tables_before
+              << " last_week_entries_before=" << last_week_entries_before
+              << " last_week_tables_after=" << last_week_tables_after
+              << " last_week_entries_after=" << last_week_entries_after
+              << " process_mem_before_mb=" << bytes_to_mb(process_mem_before)
+              << " process_mem_after_mb=" << bytes_to_mb(process_mem_after);
 }
 
 void HotspotCounter::make_dot_point() {
-    uint64_t value = cur_counter.load();
-    cur_counter = 0;
+    uint64_t value = cur_counter.exchange(0, std::memory_order_acq_rel);
     if (history_counters.size() == week_counters_size) {
         uint64_t week_counter_remove = history_counters.back();
         uint64_t day_counter_remove = history_counters[day_counters_size - 1];
@@ -184,6 +316,103 @@ uint64_t HotspotCounter::qpw() {
     return week_history_counter + cur_counter.load();
 }
 
+bool TabletHotspot::is_gc_eligible(const HotspotCounter& counter, 
SystemTimePoint now) {
+    const auto week_window = 
std::chrono::seconds((HotspotCounter::week_counters_size + 1) *
+                                                  
HotspotCounter::time_interval);
+    return counter.last_access_time < now - week_window &&
+           counter.cur_counter.load(std::memory_order_relaxed) == 0 &&
+           counter.day_history_counter.load(std::memory_order_relaxed) == 0 &&
+           counter.week_history_counter.load(std::memory_order_relaxed) == 0;
+}
+
+TabletHotspot::MaintenanceStats 
TabletHotspot::run_maintenance_once(SystemTimePoint now) {
+    MaintenanceStats stats;
+    auto gc_elapsed = SteadyClock::duration::zero();
+
+    for (size_t slot_idx = 0; slot_idx < s_slot_size; ++slot_idx) {
+        auto& slot = _tablets_hotspot[slot_idx];
+        std::vector<HotspotCounterPtr> counters;
+        size_t slot_size_before = 0;
+        size_t slot_bucket_count_before = 0;
+
+        {
+            std::lock_guard lock(slot.mtx);
+            slot_size_before = slot.map.size();
+            slot_bucket_count_before = slot.map.bucket_count();
+            stats.total_counters_before_gc += slot_size_before;
+            if (slot_size_before > 0) {
+                ++stats.non_empty_slots_before_gc;
+            }
+            stats.max_slot_size_before_gc =
+                    std::max<uint64_t>(stats.max_slot_size_before_gc, 
slot_size_before);
+            stats.sum_bucket_count_before_gc += slot_bucket_count_before;
+            counters.reserve(slot_size_before);
+            for (auto& [_, counter] : slot.map) {
+                counters.push_back(counter);
+            }
+        }
+        stats.copied_counters += counters.size();
+        std::for_each(counters.begin(), counters.end(),
+                      [](HotspotCounterPtr& counter) { 
counter->make_dot_point(); });
+
+        size_t erased_count = 0;
+        bool compacted = false;
+        size_t slot_size_after = 0;
+        size_t slot_bucket_count_after = 0;
+        auto gc_start = SteadyClock::now();
+        {
+            std::lock_guard lock(slot.mtx);
+            for (auto iter = slot.map.begin(); iter != slot.map.end();) {
+                if (is_gc_eligible(*iter->second, now)) {
+                    iter = slot.map.erase(iter);
+                    ++erased_count;
+                } else {
+                    ++iter;
+                }
+            }
+
+            if (should_compact_slot(slot_size_before, slot.map.size(), 
slot.map.bucket_count(),
+                                    erased_count)) {
+                decltype(slot.map) compacted_map;
+                compacted_map.max_load_factor(slot.map.max_load_factor());
+                compacted_map.reserve(slot.map.size());
+                for (const auto& [tablet_id, counter] : slot.map) {
+                    compacted_map.emplace(tablet_id, counter);
+                }
+                slot.map.swap(compacted_map);
+                compacted = true;
+            }
+
+            slot_size_after = slot.map.size();
+            slot_bucket_count_after = slot.map.bucket_count();
+            stats.total_counters_after_gc += slot_size_after;
+            if (slot_size_after > 0) {
+                ++stats.non_empty_slots_after_gc;
+            }
+            stats.max_slot_size_after_gc =
+                    std::max<uint64_t>(stats.max_slot_size_after_gc, 
slot_size_after);
+            stats.sum_bucket_count_after_gc += slot_bucket_count_after;
+        }
+        gc_elapsed += SteadyClock::now() - gc_start;
+
+        stats.evicted_counters += erased_count;
+        if (compacted) {
+            ++stats.compacted_slots;
+        }
+        if (erased_count > 0 || compacted) {
+            LOG(INFO) << "tablet_hotspot_diag gc_slot"
+                      << " slot_idx=" << slot_idx << " slot_size_before=" << 
slot_size_before
+                      << " slot_size_after=" << slot_size_after
+                      << " bucket_count_before=" << slot_bucket_count_before
+                      << " bucket_count_after=" << slot_bucket_count_after
+                      << " erased_count=" << erased_count << " compacted=" << 
compacted;
+        }
+    }
+
+    stats.gc_elapsed_ms = 
std::chrono::duration_cast<std::chrono::milliseconds>(gc_elapsed).count();
+    return stats;
+}
+
 void TabletHotspot::make_dot_point() {
     while (true) {
         {
@@ -194,17 +423,58 @@ void TabletHotspot::make_dot_point() {
                 break;
             }
         }
-        std::for_each(_tablets_hotspot.begin(), _tablets_hotspot.end(), 
[](HotspotMap& map) {
-            std::vector<HotspotCounterPtr> counters;
-            {
-                std::lock_guard lock(map.mtx);
-                for (auto& [_, counter] : map.map) {
-                    counters.push_back(counter);
-                }
-            }
-            std::for_each(counters.begin(), counters.end(),
-                          [](HotspotCounterPtr& counter) { 
counter->make_dot_point(); });
-        });
+
+        const uint64_t round =
+                _make_dot_point_round_total.fetch_add(1, 
std::memory_order_relaxed) + 1;
+        const uint64_t count_calls_total = 
_count_calls_total.load(std::memory_order_relaxed);
+        const uint64_t existing_hit_total = 
_existing_hit_total.load(std::memory_order_relaxed);
+        const uint64_t new_counter_total_before =
+                _new_counter_total.load(std::memory_order_relaxed);
+        const int64_t process_mem_before = process_memory_usage_for_diag();
+
+        const auto now = std::chrono::system_clock::now();
+        const MaintenanceStats stats = run_maintenance_once(now);
+
+        const int64_t process_mem_after = process_memory_usage_for_diag();
+        const uint64_t new_counter_total_after = 
_new_counter_total.load(std::memory_order_relaxed);
+        const uint64_t prev_round_new_counter_total = 
_last_round_new_counter_total.exchange(
+                new_counter_total_after, std::memory_order_relaxed);
+        const uint64_t new_counter_delta =
+                new_counter_total_after >= prev_round_new_counter_total
+                        ? (new_counter_total_after - 
prev_round_new_counter_total)
+                        : 0;
+
+        LOG(INFO) << "tablet_hotspot_diag make_dot_point round=" << round
+                  << " total_counters=" << stats.total_counters_before_gc
+                  << " total_counters_before_gc=" << 
stats.total_counters_before_gc
+                  << " total_counters_after_gc=" << 
stats.total_counters_after_gc
+                  << " non_empty_slots=" << stats.non_empty_slots_before_gc
+                  << " non_empty_slots_before_gc=" << 
stats.non_empty_slots_before_gc
+                  << " non_empty_slots_after_gc=" << 
stats.non_empty_slots_after_gc
+                  << " max_slot_size=" << stats.max_slot_size_before_gc
+                  << " max_slot_size_before_gc=" << 
stats.max_slot_size_before_gc
+                  << " max_slot_size_after_gc=" << stats.max_slot_size_after_gc
+                  << " sum_bucket_count=" << stats.sum_bucket_count_before_gc
+                  << " sum_bucket_count_before_gc=" << 
stats.sum_bucket_count_before_gc
+                  << " sum_bucket_count_after_gc=" << 
stats.sum_bucket_count_after_gc
+                  << " copied_counters=" << stats.copied_counters
+                  << " evicted_counters=" << stats.evicted_counters << " 
evicted_ratio="
+                  << ratio_or_zero(stats.evicted_counters, 
stats.total_counters_before_gc)
+                  << " compacted_slots=" << stats.compacted_slots << " 
bucket_reclaim_ratio="
+                  << ratio_or_zero(
+                             stats.sum_bucket_count_before_gc >= 
stats.sum_bucket_count_after_gc
+                                     ? (stats.sum_bucket_count_before_gc -
+                                        stats.sum_bucket_count_after_gc)
+                                     : 0,
+                             stats.sum_bucket_count_before_gc)
+                  << " gc_elapsed_ms=" << stats.gc_elapsed_ms
+                  << " count_calls_total=" << count_calls_total
+                  << " existing_hit_total=" << existing_hit_total
+                  << " new_counter_total_before=" << new_counter_total_before
+                  << " new_counter_total_after=" << new_counter_total_after
+                  << " new_counter_delta=" << new_counter_delta
+                  << " process_mem_before_mb=" << 
bytes_to_mb(process_mem_before)
+                  << " process_mem_after_mb=" << 
bytes_to_mb(process_mem_after);
     }
 }
 
diff --git a/be/src/cloud/cloud_tablet_hotspot.h 
b/be/src/cloud/cloud_tablet_hotspot.h
index 05acbcda5c7..ad1f645c050 100644
--- a/be/src/cloud/cloud_tablet_hotspot.h
+++ b/be/src/cloud/cloud_tablet_hotspot.h
@@ -17,14 +17,20 @@
 
 #pragma once
 
-#include <gen_cpp/BackendService.h>
-
+#include <array>
 #include <atomic>
 #include <chrono>
 #include <condition_variable>
+#include <cstdint>
+#include <deque>
 #include <memory>
+#include <mutex>
+#include <thread>
+#include <unordered_map>
+#include <vector>
 
-#include "storage/tablet/tablet.h"
+#include "gen_cpp/BackendService.h"
+#include "storage/tablet/tablet_fwd.h"
 
 namespace doris {
 
@@ -66,14 +72,33 @@ struct MapKeyHash {
 
 class TabletHotspot {
 public:
-    TabletHotspot();
+    struct MaintenanceStats {
+        uint64_t total_counters_before_gc = 0;
+        uint64_t total_counters_after_gc = 0;
+        uint64_t non_empty_slots_before_gc = 0;
+        uint64_t non_empty_slots_after_gc = 0;
+        uint64_t max_slot_size_before_gc = 0;
+        uint64_t max_slot_size_after_gc = 0;
+        uint64_t sum_bucket_count_before_gc = 0;
+        uint64_t sum_bucket_count_after_gc = 0;
+        uint64_t copied_counters = 0;
+        uint64_t evicted_counters = 0;
+        uint64_t compacted_slots = 0;
+        uint64_t gc_elapsed_ms = 0;
+    };
+
+    explicit TabletHotspot(bool start_counter_thread = true);
     ~TabletHotspot();
     // When query the tablet, count it
     void count(const BaseTablet& tablet);
     void get_top_n_hot_partition(std::vector<THotTableMessage>* hot_tables);
 
 private:
+    void count(int64_t tablet_id, int64_t table_id, int64_t index_id, int64_t 
partition_id);
     void make_dot_point();
+    MaintenanceStats 
run_maintenance_once(std::chrono::system_clock::time_point now);
+    static bool is_gc_eligible(const HotspotCounter& counter,
+                               std::chrono::system_clock::time_point now);
 
     struct HotspotMap {
         std::mutex mtx;
@@ -93,6 +118,12 @@ private:
     std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t, 
TabletHotspotMapValue>,
                        MapKeyHash>
             _last_week_hot_partitions;
+    std::atomic_uint64_t _count_calls_total {0};
+    std::atomic_uint64_t _existing_hit_total {0};
+    std::atomic_uint64_t _new_counter_total {0};
+    std::atomic_uint64_t _last_round_new_counter_total {0};
+    std::atomic_uint64_t _get_top_n_hot_partition_call_total {0};
+    std::atomic_uint64_t _make_dot_point_round_total {0};
 };
 
 } // namespace doris
diff --git a/be/test/cloud/cloud_tablet_hotspot_gc_test.cpp 
b/be/test/cloud/cloud_tablet_hotspot_gc_test.cpp
new file mode 100644
index 00000000000..d196fe9942d
--- /dev/null
+++ b/be/test/cloud/cloud_tablet_hotspot_gc_test.cpp
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "cloud/cloud_tablet_hotspot.h"
+
+namespace doris {
+namespace {
+
+using SystemTimePoint = std::chrono::system_clock::time_point;
+using PartitionSnapshot =
+        std::map<std::tuple<int64_t, int64_t, int64_t>, std::pair<uint64_t, 
uint64_t>>;
+
+size_t slot_idx(int64_t tablet_id) {
+    return tablet_id % TabletHotspot::s_slot_size;
+}
+
+HotspotCounterPtr insert_counter(TabletHotspot* hotspot, int64_t tablet_id, 
int64_t table_id,
+                                 int64_t index_id, int64_t partition_id,
+                                 SystemTimePoint last_access_time, uint64_t 
cur_counter,
+                                 uint64_t day_history_counter, uint64_t 
week_history_counter) {
+    auto counter = std::make_shared<HotspotCounter>(table_id, index_id, 
partition_id);
+    counter->last_access_time = last_access_time;
+    counter->cur_counter.store(cur_counter, std::memory_order_relaxed);
+    counter->day_history_counter.store(day_history_counter, 
std::memory_order_relaxed);
+    counter->week_history_counter.store(week_history_counter, 
std::memory_order_relaxed);
+
+    auto& slot = hotspot->_tablets_hotspot[slot_idx(tablet_id)];
+    std::lock_guard lock(slot.mtx);
+    slot.map[tablet_id] = counter;
+    return counter;
+}
+
+PartitionSnapshot export_snapshot(TabletHotspot* hotspot) {
+    std::vector<THotTableMessage> hot_tables;
+    hotspot->get_top_n_hot_partition(&hot_tables);
+
+    PartitionSnapshot snapshot;
+    for (const auto& table_msg : hot_tables) {
+        for (const auto& partition : table_msg.hot_partitions) {
+            snapshot[std::make_tuple(table_msg.table_id, table_msg.index_id,
+                                     partition.partition_id)] =
+                    std::make_pair(partition.query_per_day, 
partition.query_per_week);
+        }
+    }
+    return snapshot;
+}
+
+} // namespace
+
+TEST(TabletHotspotGcTest, GcEligibilityRequiresExpiredAndZeroContribution) {
+    const auto now = std::chrono::system_clock::now();
+
+    HotspotCounter expired_zero_counter(1, 2, 3);
+    expired_zero_counter.last_access_time = now - std::chrono::hours(24 * 8);
+
+    HotspotCounter recent_zero_counter(1, 2, 3);
+    recent_zero_counter.last_access_time = now - std::chrono::hours(24 * 6);
+
+    HotspotCounter expired_week_counter(1, 2, 3);
+    expired_week_counter.last_access_time = now - std::chrono::hours(24 * 8);
+    expired_week_counter.week_history_counter.store(1, 
std::memory_order_relaxed);
+
+    EXPECT_TRUE(TabletHotspot::is_gc_eligible(expired_zero_counter, now));
+    EXPECT_FALSE(TabletHotspot::is_gc_eligible(recent_zero_counter, now));
+    EXPECT_FALSE(TabletHotspot::is_gc_eligible(expired_week_counter, now));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceEvictsExpiredZeroCounter) {
+    TabletHotspot hotspot(false);
+    const auto now = std::chrono::system_clock::now();
+    const int64_t tablet_id = 1001;
+
+    insert_counter(&hotspot, tablet_id, 11, 12, 13, now - 
std::chrono::hours(24 * 8), 0, 0, 0);
+
+    const auto stats = hotspot.run_maintenance_once(now);
+    auto& slot = hotspot._tablets_hotspot[slot_idx(tablet_id)];
+
+    std::lock_guard lock(slot.mtx);
+    EXPECT_EQ(1u, stats.total_counters_before_gc);
+    EXPECT_EQ(0u, stats.total_counters_after_gc);
+    EXPECT_EQ(1u, stats.evicted_counters);
+    EXPECT_EQ(slot.map.end(), slot.map.find(tablet_id));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceKeepsRecentZeroCounter) {
+    TabletHotspot hotspot(false);
+    const auto now = std::chrono::system_clock::now();
+    const int64_t tablet_id = 1002;
+
+    insert_counter(&hotspot, tablet_id, 21, 22, 23, now - 
std::chrono::hours(24 * 6), 0, 0, 0);
+
+    const auto stats = hotspot.run_maintenance_once(now);
+    auto& slot = hotspot._tablets_hotspot[slot_idx(tablet_id)];
+
+    std::lock_guard lock(slot.mtx);
+    EXPECT_EQ(1u, stats.total_counters_before_gc);
+    EXPECT_EQ(1u, stats.total_counters_after_gc);
+    EXPECT_EQ(0u, stats.evicted_counters);
+    ASSERT_NE(slot.map.end(), slot.map.find(tablet_id));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceRemovesColdCounterAndCountRecreatesIt) 
{
+    TabletHotspot hotspot(false);
+    const auto now = std::chrono::system_clock::now();
+    const int64_t tablet_id = 1003;
+
+    insert_counter(&hotspot, tablet_id, 31, 32, 33, now - 
std::chrono::hours(24 * 8), 0, 0, 0);
+
+    const auto stats = hotspot.run_maintenance_once(now);
+    EXPECT_EQ(1u, stats.evicted_counters);
+
+    hotspot.count(tablet_id, 31, 32, 33);
+
+    auto& slot = hotspot._tablets_hotspot[slot_idx(tablet_id)];
+    std::lock_guard lock(slot.mtx);
+    auto iter = slot.map.find(tablet_id);
+    ASSERT_NE(slot.map.end(), iter);
+    EXPECT_EQ(31, iter->second->table_id);
+    EXPECT_EQ(32, iter->second->index_id);
+    EXPECT_EQ(33, iter->second->partition_id);
+    EXPECT_EQ(1u, iter->second->cur_counter.load(std::memory_order_relaxed));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceCompactsSparseShard) {
+    TabletHotspot hotspot(false);
+    const auto now = std::chrono::system_clock::now();
+    const int64_t slot_seed = 17;
+
+    for (int i = 0; i < 256; ++i) {
+        const int64_t tablet_id = slot_seed + static_cast<int64_t>(i) * 
TabletHotspot::s_slot_size;
+        insert_counter(&hotspot, tablet_id, 41, 42, 43, now - 
std::chrono::hours(24 * 8), 0, 0, 0);
+    }
+
+    auto& slot = hotspot._tablets_hotspot[slot_idx(slot_seed)];
+    size_t bucket_count_before = 0;
+    {
+        std::lock_guard lock(slot.mtx);
+        bucket_count_before = slot.map.bucket_count();
+        ASSERT_GE(slot.map.size(), 256u);
+    }
+
+    const auto stats = hotspot.run_maintenance_once(now);
+
+    std::lock_guard lock(slot.mtx);
+    EXPECT_EQ(256u, stats.evicted_counters);
+    EXPECT_EQ(1u, stats.compacted_slots);
+    EXPECT_TRUE(slot.map.empty());
+    EXPECT_LT(slot.map.bucket_count(), bucket_count_before);
+}
+
+TEST(TabletHotspotGcTest, GcKeepsHotspotExportStable) {
+    const auto now = std::chrono::system_clock::now();
+
+    TabletHotspot baseline(false);
+    insert_counter(&baseline, 2001, 51, 52, 53, now - std::chrono::hours(1), 
0, 7, 11);
+    insert_counter(&baseline, 2002, 51, 52, 54, now - std::chrono::hours(24 * 
8), 0, 0, 0);
+
+    TabletHotspot with_gc(false);
+    insert_counter(&with_gc, 2001, 51, 52, 53, now - std::chrono::hours(1), 0, 
7, 11);
+    insert_counter(&with_gc, 2002, 51, 52, 54, now - std::chrono::hours(24 * 
8), 0, 0, 0);
+
+    const PartitionSnapshot before_gc = export_snapshot(&baseline);
+    const auto stats = with_gc.run_maintenance_once(now);
+    EXPECT_EQ(1u, stats.evicted_counters);
+    const PartitionSnapshot after_gc = export_snapshot(&with_gc);
+
+    EXPECT_EQ(before_gc, after_gc);
+    ASSERT_EQ(1u, after_gc.size());
+    const auto iter = after_gc.find(std::make_tuple(51, 52, 53));
+    ASSERT_NE(after_gc.end(), iter);
+    EXPECT_EQ(7u, iter->second.first);
+    EXPECT_EQ(11u, iter->second.second);
+}
+
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to