This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new d2a45c2ccb8 branch-4.1: [fix](filecache) reclaim expired tablet
hotspot counters and compact sparse shards (#61834)
d2a45c2ccb8 is described below
commit d2a45c2ccb8e5683d59c4a3095a76aadcf982ef6
Author: zhengyu <[email protected]>
AuthorDate: Sat Mar 28 22:29:12 2026 +0800
branch-4.1: [fix](filecache) reclaim expired tablet hotspot counters and
compact sparse shards (#61834)
TabletHotspot memory kept growing by about 1.8-2.7 GB per day because
counters for cold tablets remained resident even after they no longer
contributed to the hour/day/week hotspot windows. Their history deques
kept consuming memory, and the sharded unordered_maps continued to hold
expanded bucket arrays without a reclamation path.
Fix this by running a maintenance pass after each hourly rollup to evict
counters whose last access is older than the one-week window and whose
current/day/week contributions are all zero. When a shard becomes sparse
after eviction, rebuild the map to release bucket memory. Also add
GC-focused diagnostics and unit tests to verify eviction, recreation,
and shard compaction behavior.
### What problem does this PR solve?
Issue Number: close #xxx
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/cloud/cloud_tablet_hotspot.cpp | 342 ++++++++++++++++++++++---
be/src/cloud/cloud_tablet_hotspot.h | 39 ++-
be/test/cloud/cloud_tablet_hotspot_gc_test.cpp | 198 ++++++++++++++
3 files changed, 539 insertions(+), 40 deletions(-)
diff --git a/be/src/cloud/cloud_tablet_hotspot.cpp
b/be/src/cloud/cloud_tablet_hotspot.cpp
index e6129669690..27c64eeaeaf 100644
--- a/be/src/cloud/cloud_tablet_hotspot.cpp
+++ b/be/src/cloud/cloud_tablet_hotspot.cpp
@@ -17,33 +17,118 @@
#include "cloud/cloud_tablet_hotspot.h"
+#include <algorithm>
#include <chrono>
+#include <cmath>
+#include <cstdint>
#include <mutex>
+#include <vector>
-#include "cloud/config.h"
-#include "runtime/exec_env.h"
-#include "storage/tablet/tablet_fwd.h"
+#include "runtime/memory/global_memory_arbitrator.h"
+#include "storage/tablet/base_tablet.h"
namespace doris {
+namespace {
+
+using HotPartitionSnapshot =
+ std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t,
TabletHotspotMapValue>,
+ MapKeyHash>;
+
+constexpr uint64_t kCountHeartbeatInterval = 500000;
+constexpr uint64_t kNewCounterLogInterval = 4096;
+constexpr size_t kCompactBucketMultiplier = 4;
+constexpr size_t kCompactEraseRatioDivisor = 4;
+
+using SystemTimePoint = std::chrono::system_clock::time_point;
+using SteadyClock = std::chrono::steady_clock;
+
+double bytes_to_mb(int64_t bytes) {
+ return static_cast<double>(bytes) / (1024.0 * 1024.0);
+}
+
+double ratio_or_zero(uint64_t numerator, uint64_t denominator) {
+ if (denominator == 0) {
+ return 0.0;
+ }
+ return static_cast<double>(numerator) / static_cast<double>(denominator);
+}
+
+int64_t process_memory_usage_for_diag() {
+#ifdef BE_TEST
+ return 0;
+#else
+ return GlobalMemoryArbitrator::process_memory_usage();
+#endif
+}
+
+uint64_t count_hot_partition_entries(const HotPartitionSnapshot& snapshot) {
+ uint64_t entries = 0;
+ for (const auto& [_, partition_to_value] : snapshot) {
+ entries += partition_to_value.size();
+ }
+ return entries;
+}
+
+bool should_compact_slot(size_t slot_size_before, size_t slot_size_after,
size_t bucket_count_after,
+ size_t erased_count) {
+ if (erased_count == 0) {
+ return false;
+ }
+ if (slot_size_after == 0) {
+ return true;
+ }
+ return bucket_count_after > (kCompactBucketMultiplier * slot_size_after) ||
+ (slot_size_before > 0 && erased_count * kCompactEraseRatioDivisor
>= slot_size_before);
+}
+
+} // namespace
+
void TabletHotspot::count(const BaseTablet& tablet) {
- size_t slot_idx = tablet.tablet_id() % s_slot_size;
+ count(tablet.tablet_id(), tablet.table_id(), tablet.index_id(),
tablet.partition_id());
+}
+
+void TabletHotspot::count(int64_t tablet_id, int64_t table_id, int64_t
index_id,
+ int64_t partition_id) {
+ const uint64_t count_calls_total =
+ _count_calls_total.fetch_add(1, std::memory_order_relaxed) + 1;
+
+ size_t slot_idx = tablet_id % s_slot_size;
auto& slot = _tablets_hotspot[slot_idx];
- std::lock_guard lock(slot.mtx);
- HotspotCounterPtr counter;
- if (auto iter = slot.map.find(tablet.tablet_id()); iter == slot.map.end())
{
- counter = std::make_shared<HotspotCounter>(tablet.table_id(),
tablet.index_id(),
- tablet.partition_id());
- slot.map.insert(std::make_pair(tablet.tablet_id(), counter));
- } else {
- counter = iter->second;
+ bool should_log_new_counter = false;
+ uint64_t new_counter_total = 0;
+ {
+ std::lock_guard lock(slot.mtx);
+ HotspotCounterPtr counter;
+ if (auto iter = slot.map.find(tablet_id); iter == slot.map.end()) {
+ counter = std::make_shared<HotspotCounter>(table_id, index_id,
partition_id);
+ slot.map.insert(std::make_pair(tablet_id, counter));
+ new_counter_total = _new_counter_total.fetch_add(1,
std::memory_order_relaxed) + 1;
+ should_log_new_counter = (new_counter_total %
kNewCounterLogInterval == 0);
+ } else {
+ counter = iter->second;
+ _existing_hit_total.fetch_add(1, std::memory_order_relaxed);
+ }
+ counter->last_access_time = std::chrono::system_clock::now();
+ counter->cur_counter.fetch_add(1, std::memory_order_relaxed);
+ }
+
+ if (should_log_new_counter) {
+ LOG(INFO) << "tablet_hotspot_diag new_counter_total=" <<
new_counter_total
+ << " count_calls_total=" << count_calls_total
+ << " existing_hit_total=" <<
_existing_hit_total.load(std::memory_order_relaxed);
+ }
+ if (count_calls_total % kCountHeartbeatInterval == 0) {
+ LOG(INFO) << "tablet_hotspot_diag count_heartbeat count_calls_total="
<< count_calls_total
+ << " existing_hit_total=" <<
_existing_hit_total.load(std::memory_order_relaxed)
+ << " new_counter_total=" <<
_new_counter_total.load(std::memory_order_relaxed);
}
- counter->last_access_time = std::chrono::system_clock::now();
- counter->cur_counter++;
}
-TabletHotspot::TabletHotspot() {
- _counter_thread = std::thread(&TabletHotspot::make_dot_point, this);
+TabletHotspot::TabletHotspot(bool start_counter_thread) {
+ if (start_counter_thread) {
+ _counter_thread = std::thread(&TabletHotspot::make_dot_point, this);
+ }
}
TabletHotspot::~TabletHotspot() {
@@ -108,6 +193,21 @@ void get_return_partitions(
}
void TabletHotspot::get_top_n_hot_partition(std::vector<THotTableMessage>*
hot_tables) {
+ const uint64_t call_id =
+ _get_top_n_hot_partition_call_total.fetch_add(1,
std::memory_order_relaxed) + 1;
+ uint64_t last_day_tables_before = 0;
+ uint64_t last_day_entries_before = 0;
+ uint64_t last_week_tables_before = 0;
+ uint64_t last_week_entries_before = 0;
+ {
+ std::lock_guard lock(_last_partitions_mtx);
+ last_day_tables_before = _last_day_hot_partitions.size();
+ last_day_entries_before =
count_hot_partition_entries(_last_day_hot_partitions);
+ last_week_tables_before = _last_week_hot_partitions.size();
+ last_week_entries_before =
count_hot_partition_entries(_last_week_hot_partitions);
+ }
+ const int64_t process_mem_before = process_memory_usage_for_diag();
+
// map<pair<table_id, index_id>, map<partition_id, value>> for day
std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t,
TabletHotspotMapValue>,
MapKeyHash>
@@ -145,20 +245,52 @@ void
TabletHotspot::get_top_n_hot_partition(std::vector<THotTableMessage>* hot_t
});
constexpr int N = 50;
int return_partitions = 0;
+ const uint64_t day_tables_built = day_hot_partitions.size();
+ const uint64_t day_entries_built =
count_hot_partition_entries(day_hot_partitions);
+ const uint64_t week_tables_built = week_hot_partitions.size();
+ const uint64_t week_entries_built =
count_hot_partition_entries(week_hot_partitions);
+ uint64_t last_day_tables_after = 0;
+ uint64_t last_day_entries_after = 0;
+ uint64_t last_week_tables_after = 0;
+ uint64_t last_week_entries_after = 0;
- std::unique_lock lock(_last_partitions_mtx);
- get_return_partitions(day_hot_partitions, _last_day_hot_partitions,
hot_tables,
- return_partitions, N);
- get_return_partitions(week_hot_partitions, _last_week_hot_partitions,
hot_tables,
- return_partitions, N);
+ {
+ std::unique_lock lock(_last_partitions_mtx);
+ get_return_partitions(day_hot_partitions, _last_day_hot_partitions,
hot_tables,
+ return_partitions, N);
+ get_return_partitions(week_hot_partitions, _last_week_hot_partitions,
hot_tables,
+ return_partitions, N);
- _last_day_hot_partitions = std::move(day_hot_partitions);
- _last_week_hot_partitions = std::move(week_hot_partitions);
+ _last_day_hot_partitions = std::move(day_hot_partitions);
+ _last_week_hot_partitions = std::move(week_hot_partitions);
+ last_day_tables_after = _last_day_hot_partitions.size();
+ last_day_entries_after =
count_hot_partition_entries(_last_day_hot_partitions);
+ last_week_tables_after = _last_week_hot_partitions.size();
+ last_week_entries_after =
count_hot_partition_entries(_last_week_hot_partitions);
+ }
+
+ const int64_t process_mem_after = process_memory_usage_for_diag();
+
+ LOG(INFO) << "tablet_hotspot_diag get_top_n_hot_partition call_id=" <<
call_id
+ << " day_tables_built=" << day_tables_built
+ << " day_entries_built=" << day_entries_built
+ << " week_tables_built=" << week_tables_built
+ << " week_entries_built=" << week_entries_built
+ << " returned_partitions=" << return_partitions
+ << " last_day_tables_before=" << last_day_tables_before
+ << " last_day_entries_before=" << last_day_entries_before
+ << " last_day_tables_after=" << last_day_tables_after
+ << " last_day_entries_after=" << last_day_entries_after
+ << " last_week_tables_before=" << last_week_tables_before
+ << " last_week_entries_before=" << last_week_entries_before
+ << " last_week_tables_after=" << last_week_tables_after
+ << " last_week_entries_after=" << last_week_entries_after
+ << " process_mem_before_mb=" << bytes_to_mb(process_mem_before)
+ << " process_mem_after_mb=" << bytes_to_mb(process_mem_after);
}
void HotspotCounter::make_dot_point() {
- uint64_t value = cur_counter.load();
- cur_counter = 0;
+ uint64_t value = cur_counter.exchange(0, std::memory_order_acq_rel);
if (history_counters.size() == week_counters_size) {
uint64_t week_counter_remove = history_counters.back();
uint64_t day_counter_remove = history_counters[day_counters_size - 1];
@@ -184,6 +316,103 @@ uint64_t HotspotCounter::qpw() {
return week_history_counter + cur_counter.load();
}
+bool TabletHotspot::is_gc_eligible(const HotspotCounter& counter,
SystemTimePoint now) {
+ const auto week_window =
std::chrono::seconds((HotspotCounter::week_counters_size + 1) *
+
HotspotCounter::time_interval);
+ return counter.last_access_time < now - week_window &&
+ counter.cur_counter.load(std::memory_order_relaxed) == 0 &&
+ counter.day_history_counter.load(std::memory_order_relaxed) == 0 &&
+ counter.week_history_counter.load(std::memory_order_relaxed) == 0;
+}
+
+TabletHotspot::MaintenanceStats
TabletHotspot::run_maintenance_once(SystemTimePoint now) {
+ MaintenanceStats stats;
+ auto gc_elapsed = SteadyClock::duration::zero();
+
+ for (size_t slot_idx = 0; slot_idx < s_slot_size; ++slot_idx) {
+ auto& slot = _tablets_hotspot[slot_idx];
+ std::vector<HotspotCounterPtr> counters;
+ size_t slot_size_before = 0;
+ size_t slot_bucket_count_before = 0;
+
+ {
+ std::lock_guard lock(slot.mtx);
+ slot_size_before = slot.map.size();
+ slot_bucket_count_before = slot.map.bucket_count();
+ stats.total_counters_before_gc += slot_size_before;
+ if (slot_size_before > 0) {
+ ++stats.non_empty_slots_before_gc;
+ }
+ stats.max_slot_size_before_gc =
+ std::max<uint64_t>(stats.max_slot_size_before_gc,
slot_size_before);
+ stats.sum_bucket_count_before_gc += slot_bucket_count_before;
+ counters.reserve(slot_size_before);
+ for (auto& [_, counter] : slot.map) {
+ counters.push_back(counter);
+ }
+ }
+ stats.copied_counters += counters.size();
+ std::for_each(counters.begin(), counters.end(),
+ [](HotspotCounterPtr& counter) {
counter->make_dot_point(); });
+
+ size_t erased_count = 0;
+ bool compacted = false;
+ size_t slot_size_after = 0;
+ size_t slot_bucket_count_after = 0;
+ auto gc_start = SteadyClock::now();
+ {
+ std::lock_guard lock(slot.mtx);
+ for (auto iter = slot.map.begin(); iter != slot.map.end();) {
+ if (is_gc_eligible(*iter->second, now)) {
+ iter = slot.map.erase(iter);
+ ++erased_count;
+ } else {
+ ++iter;
+ }
+ }
+
+ if (should_compact_slot(slot_size_before, slot.map.size(),
slot.map.bucket_count(),
+ erased_count)) {
+ decltype(slot.map) compacted_map;
+ compacted_map.max_load_factor(slot.map.max_load_factor());
+ compacted_map.reserve(slot.map.size());
+ for (const auto& [tablet_id, counter] : slot.map) {
+ compacted_map.emplace(tablet_id, counter);
+ }
+ slot.map.swap(compacted_map);
+ compacted = true;
+ }
+
+ slot_size_after = slot.map.size();
+ slot_bucket_count_after = slot.map.bucket_count();
+ stats.total_counters_after_gc += slot_size_after;
+ if (slot_size_after > 0) {
+ ++stats.non_empty_slots_after_gc;
+ }
+ stats.max_slot_size_after_gc =
+ std::max<uint64_t>(stats.max_slot_size_after_gc,
slot_size_after);
+ stats.sum_bucket_count_after_gc += slot_bucket_count_after;
+ }
+ gc_elapsed += SteadyClock::now() - gc_start;
+
+ stats.evicted_counters += erased_count;
+ if (compacted) {
+ ++stats.compacted_slots;
+ }
+ if (erased_count > 0 || compacted) {
+ LOG(INFO) << "tablet_hotspot_diag gc_slot"
+ << " slot_idx=" << slot_idx << " slot_size_before=" <<
slot_size_before
+ << " slot_size_after=" << slot_size_after
+ << " bucket_count_before=" << slot_bucket_count_before
+ << " bucket_count_after=" << slot_bucket_count_after
+ << " erased_count=" << erased_count << " compacted=" <<
compacted;
+ }
+ }
+
+ stats.gc_elapsed_ms =
std::chrono::duration_cast<std::chrono::milliseconds>(gc_elapsed).count();
+ return stats;
+}
+
void TabletHotspot::make_dot_point() {
while (true) {
{
@@ -194,17 +423,58 @@ void TabletHotspot::make_dot_point() {
break;
}
}
- std::for_each(_tablets_hotspot.begin(), _tablets_hotspot.end(),
[](HotspotMap& map) {
- std::vector<HotspotCounterPtr> counters;
- {
- std::lock_guard lock(map.mtx);
- for (auto& [_, counter] : map.map) {
- counters.push_back(counter);
- }
- }
- std::for_each(counters.begin(), counters.end(),
- [](HotspotCounterPtr& counter) {
counter->make_dot_point(); });
- });
+
+ const uint64_t round =
+ _make_dot_point_round_total.fetch_add(1,
std::memory_order_relaxed) + 1;
+ const uint64_t count_calls_total =
_count_calls_total.load(std::memory_order_relaxed);
+ const uint64_t existing_hit_total =
_existing_hit_total.load(std::memory_order_relaxed);
+ const uint64_t new_counter_total_before =
+ _new_counter_total.load(std::memory_order_relaxed);
+ const int64_t process_mem_before = process_memory_usage_for_diag();
+
+ const auto now = std::chrono::system_clock::now();
+ const MaintenanceStats stats = run_maintenance_once(now);
+
+ const int64_t process_mem_after = process_memory_usage_for_diag();
+ const uint64_t new_counter_total_after =
_new_counter_total.load(std::memory_order_relaxed);
+ const uint64_t prev_round_new_counter_total =
_last_round_new_counter_total.exchange(
+ new_counter_total_after, std::memory_order_relaxed);
+ const uint64_t new_counter_delta =
+ new_counter_total_after >= prev_round_new_counter_total
+ ? (new_counter_total_after -
prev_round_new_counter_total)
+ : 0;
+
+ LOG(INFO) << "tablet_hotspot_diag make_dot_point round=" << round
+ << " total_counters=" << stats.total_counters_before_gc
+ << " total_counters_before_gc=" <<
stats.total_counters_before_gc
+ << " total_counters_after_gc=" <<
stats.total_counters_after_gc
+ << " non_empty_slots=" << stats.non_empty_slots_before_gc
+ << " non_empty_slots_before_gc=" <<
stats.non_empty_slots_before_gc
+ << " non_empty_slots_after_gc=" <<
stats.non_empty_slots_after_gc
+ << " max_slot_size=" << stats.max_slot_size_before_gc
+ << " max_slot_size_before_gc=" <<
stats.max_slot_size_before_gc
+ << " max_slot_size_after_gc=" << stats.max_slot_size_after_gc
+ << " sum_bucket_count=" << stats.sum_bucket_count_before_gc
+ << " sum_bucket_count_before_gc=" <<
stats.sum_bucket_count_before_gc
+ << " sum_bucket_count_after_gc=" <<
stats.sum_bucket_count_after_gc
+ << " copied_counters=" << stats.copied_counters
+ << " evicted_counters=" << stats.evicted_counters << "
evicted_ratio="
+ << ratio_or_zero(stats.evicted_counters,
stats.total_counters_before_gc)
+ << " compacted_slots=" << stats.compacted_slots << "
bucket_reclaim_ratio="
+ << ratio_or_zero(
+ stats.sum_bucket_count_before_gc >=
stats.sum_bucket_count_after_gc
+ ? (stats.sum_bucket_count_before_gc -
+ stats.sum_bucket_count_after_gc)
+ : 0,
+ stats.sum_bucket_count_before_gc)
+ << " gc_elapsed_ms=" << stats.gc_elapsed_ms
+ << " count_calls_total=" << count_calls_total
+ << " existing_hit_total=" << existing_hit_total
+ << " new_counter_total_before=" << new_counter_total_before
+ << " new_counter_total_after=" << new_counter_total_after
+ << " new_counter_delta=" << new_counter_delta
+ << " process_mem_before_mb=" <<
bytes_to_mb(process_mem_before)
+ << " process_mem_after_mb=" <<
bytes_to_mb(process_mem_after);
}
}
diff --git a/be/src/cloud/cloud_tablet_hotspot.h
b/be/src/cloud/cloud_tablet_hotspot.h
index 05acbcda5c7..ad1f645c050 100644
--- a/be/src/cloud/cloud_tablet_hotspot.h
+++ b/be/src/cloud/cloud_tablet_hotspot.h
@@ -17,14 +17,20 @@
#pragma once
-#include <gen_cpp/BackendService.h>
-
+#include <array>
#include <atomic>
#include <chrono>
#include <condition_variable>
+#include <cstdint>
+#include <deque>
#include <memory>
+#include <mutex>
+#include <thread>
+#include <unordered_map>
+#include <vector>
-#include "storage/tablet/tablet.h"
+#include "gen_cpp/BackendService.h"
+#include "storage/tablet/tablet_fwd.h"
namespace doris {
@@ -66,14 +72,33 @@ struct MapKeyHash {
class TabletHotspot {
public:
- TabletHotspot();
+ struct MaintenanceStats {
+ uint64_t total_counters_before_gc = 0;
+ uint64_t total_counters_after_gc = 0;
+ uint64_t non_empty_slots_before_gc = 0;
+ uint64_t non_empty_slots_after_gc = 0;
+ uint64_t max_slot_size_before_gc = 0;
+ uint64_t max_slot_size_after_gc = 0;
+ uint64_t sum_bucket_count_before_gc = 0;
+ uint64_t sum_bucket_count_after_gc = 0;
+ uint64_t copied_counters = 0;
+ uint64_t evicted_counters = 0;
+ uint64_t compacted_slots = 0;
+ uint64_t gc_elapsed_ms = 0;
+ };
+
+ explicit TabletHotspot(bool start_counter_thread = true);
~TabletHotspot();
// When query the tablet, count it
void count(const BaseTablet& tablet);
void get_top_n_hot_partition(std::vector<THotTableMessage>* hot_tables);
private:
+ void count(int64_t tablet_id, int64_t table_id, int64_t index_id, int64_t
partition_id);
void make_dot_point();
+ MaintenanceStats
run_maintenance_once(std::chrono::system_clock::time_point now);
+ static bool is_gc_eligible(const HotspotCounter& counter,
+ std::chrono::system_clock::time_point now);
struct HotspotMap {
std::mutex mtx;
@@ -93,6 +118,12 @@ private:
std::unordered_map<TabletHotspotMapKey, std::unordered_map<int64_t,
TabletHotspotMapValue>,
MapKeyHash>
_last_week_hot_partitions;
+ std::atomic_uint64_t _count_calls_total {0};
+ std::atomic_uint64_t _existing_hit_total {0};
+ std::atomic_uint64_t _new_counter_total {0};
+ std::atomic_uint64_t _last_round_new_counter_total {0};
+ std::atomic_uint64_t _get_top_n_hot_partition_call_total {0};
+ std::atomic_uint64_t _make_dot_point_round_total {0};
};
} // namespace doris
diff --git a/be/test/cloud/cloud_tablet_hotspot_gc_test.cpp
b/be/test/cloud/cloud_tablet_hotspot_gc_test.cpp
new file mode 100644
index 00000000000..d196fe9942d
--- /dev/null
+++ b/be/test/cloud/cloud_tablet_hotspot_gc_test.cpp
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <chrono>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "cloud/cloud_tablet_hotspot.h"
+
+namespace doris {
+namespace {
+
+using SystemTimePoint = std::chrono::system_clock::time_point;
+using PartitionSnapshot =
+ std::map<std::tuple<int64_t, int64_t, int64_t>, std::pair<uint64_t,
uint64_t>>;
+
+size_t slot_idx(int64_t tablet_id) {
+ return tablet_id % TabletHotspot::s_slot_size;
+}
+
+HotspotCounterPtr insert_counter(TabletHotspot* hotspot, int64_t tablet_id,
int64_t table_id,
+ int64_t index_id, int64_t partition_id,
+ SystemTimePoint last_access_time, uint64_t
cur_counter,
+ uint64_t day_history_counter, uint64_t
week_history_counter) {
+ auto counter = std::make_shared<HotspotCounter>(table_id, index_id,
partition_id);
+ counter->last_access_time = last_access_time;
+ counter->cur_counter.store(cur_counter, std::memory_order_relaxed);
+ counter->day_history_counter.store(day_history_counter,
std::memory_order_relaxed);
+ counter->week_history_counter.store(week_history_counter,
std::memory_order_relaxed);
+
+ auto& slot = hotspot->_tablets_hotspot[slot_idx(tablet_id)];
+ std::lock_guard lock(slot.mtx);
+ slot.map[tablet_id] = counter;
+ return counter;
+}
+
+PartitionSnapshot export_snapshot(TabletHotspot* hotspot) {
+ std::vector<THotTableMessage> hot_tables;
+ hotspot->get_top_n_hot_partition(&hot_tables);
+
+ PartitionSnapshot snapshot;
+ for (const auto& table_msg : hot_tables) {
+ for (const auto& partition : table_msg.hot_partitions) {
+ snapshot[std::make_tuple(table_msg.table_id, table_msg.index_id,
+ partition.partition_id)] =
+ std::make_pair(partition.query_per_day,
partition.query_per_week);
+ }
+ }
+ return snapshot;
+}
+
+} // namespace
+
+TEST(TabletHotspotGcTest, GcEligibilityRequiresExpiredAndZeroContribution) {
+ const auto now = std::chrono::system_clock::now();
+
+ HotspotCounter expired_zero_counter(1, 2, 3);
+ expired_zero_counter.last_access_time = now - std::chrono::hours(24 * 8);
+
+ HotspotCounter recent_zero_counter(1, 2, 3);
+ recent_zero_counter.last_access_time = now - std::chrono::hours(24 * 6);
+
+ HotspotCounter expired_week_counter(1, 2, 3);
+ expired_week_counter.last_access_time = now - std::chrono::hours(24 * 8);
+ expired_week_counter.week_history_counter.store(1,
std::memory_order_relaxed);
+
+ EXPECT_TRUE(TabletHotspot::is_gc_eligible(expired_zero_counter, now));
+ EXPECT_FALSE(TabletHotspot::is_gc_eligible(recent_zero_counter, now));
+ EXPECT_FALSE(TabletHotspot::is_gc_eligible(expired_week_counter, now));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceEvictsExpiredZeroCounter) {
+ TabletHotspot hotspot(false);
+ const auto now = std::chrono::system_clock::now();
+ const int64_t tablet_id = 1001;
+
+ insert_counter(&hotspot, tablet_id, 11, 12, 13, now -
std::chrono::hours(24 * 8), 0, 0, 0);
+
+ const auto stats = hotspot.run_maintenance_once(now);
+ auto& slot = hotspot._tablets_hotspot[slot_idx(tablet_id)];
+
+ std::lock_guard lock(slot.mtx);
+ EXPECT_EQ(1u, stats.total_counters_before_gc);
+ EXPECT_EQ(0u, stats.total_counters_after_gc);
+ EXPECT_EQ(1u, stats.evicted_counters);
+ EXPECT_EQ(slot.map.end(), slot.map.find(tablet_id));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceKeepsRecentZeroCounter) {
+ TabletHotspot hotspot(false);
+ const auto now = std::chrono::system_clock::now();
+ const int64_t tablet_id = 1002;
+
+ insert_counter(&hotspot, tablet_id, 21, 22, 23, now -
std::chrono::hours(24 * 6), 0, 0, 0);
+
+ const auto stats = hotspot.run_maintenance_once(now);
+ auto& slot = hotspot._tablets_hotspot[slot_idx(tablet_id)];
+
+ std::lock_guard lock(slot.mtx);
+ EXPECT_EQ(1u, stats.total_counters_before_gc);
+ EXPECT_EQ(1u, stats.total_counters_after_gc);
+ EXPECT_EQ(0u, stats.evicted_counters);
+ ASSERT_NE(slot.map.end(), slot.map.find(tablet_id));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceRemovesColdCounterAndCountRecreatesIt)
{
+ TabletHotspot hotspot(false);
+ const auto now = std::chrono::system_clock::now();
+ const int64_t tablet_id = 1003;
+
+ insert_counter(&hotspot, tablet_id, 31, 32, 33, now -
std::chrono::hours(24 * 8), 0, 0, 0);
+
+ const auto stats = hotspot.run_maintenance_once(now);
+ EXPECT_EQ(1u, stats.evicted_counters);
+
+ hotspot.count(tablet_id, 31, 32, 33);
+
+ auto& slot = hotspot._tablets_hotspot[slot_idx(tablet_id)];
+ std::lock_guard lock(slot.mtx);
+ auto iter = slot.map.find(tablet_id);
+ ASSERT_NE(slot.map.end(), iter);
+ EXPECT_EQ(31, iter->second->table_id);
+ EXPECT_EQ(32, iter->second->index_id);
+ EXPECT_EQ(33, iter->second->partition_id);
+ EXPECT_EQ(1u, iter->second->cur_counter.load(std::memory_order_relaxed));
+}
+
+TEST(TabletHotspotGcTest, RunMaintenanceCompactsSparseShard) {
+ TabletHotspot hotspot(false);
+ const auto now = std::chrono::system_clock::now();
+ const int64_t slot_seed = 17;
+
+ for (int i = 0; i < 256; ++i) {
+ const int64_t tablet_id = slot_seed + static_cast<int64_t>(i) *
TabletHotspot::s_slot_size;
+ insert_counter(&hotspot, tablet_id, 41, 42, 43, now -
std::chrono::hours(24 * 8), 0, 0, 0);
+ }
+
+ auto& slot = hotspot._tablets_hotspot[slot_idx(slot_seed)];
+ size_t bucket_count_before = 0;
+ {
+ std::lock_guard lock(slot.mtx);
+ bucket_count_before = slot.map.bucket_count();
+ ASSERT_GE(slot.map.size(), 256u);
+ }
+
+ const auto stats = hotspot.run_maintenance_once(now);
+
+ std::lock_guard lock(slot.mtx);
+ EXPECT_EQ(256u, stats.evicted_counters);
+ EXPECT_EQ(1u, stats.compacted_slots);
+ EXPECT_TRUE(slot.map.empty());
+ EXPECT_LT(slot.map.bucket_count(), bucket_count_before);
+}
+
+TEST(TabletHotspotGcTest, GcKeepsHotspotExportStable) {
+ const auto now = std::chrono::system_clock::now();
+
+ TabletHotspot baseline(false);
+ insert_counter(&baseline, 2001, 51, 52, 53, now - std::chrono::hours(1),
0, 7, 11);
+ insert_counter(&baseline, 2002, 51, 52, 54, now - std::chrono::hours(24 *
8), 0, 0, 0);
+
+ TabletHotspot with_gc(false);
+ insert_counter(&with_gc, 2001, 51, 52, 53, now - std::chrono::hours(1), 0,
7, 11);
+ insert_counter(&with_gc, 2002, 51, 52, 54, now - std::chrono::hours(24 *
8), 0, 0, 0);
+
+ const PartitionSnapshot before_gc = export_snapshot(&baseline);
+ const auto stats = with_gc.run_maintenance_once(now);
+ EXPECT_EQ(1u, stats.evicted_counters);
+ const PartitionSnapshot after_gc = export_snapshot(&with_gc);
+
+ EXPECT_EQ(before_gc, after_gc);
+ ASSERT_EQ(1u, after_gc.size());
+ const auto iter = after_gc.find(std::make_tuple(51, 52, 53));
+ ASSERT_NE(after_gc.end(), iter);
+ EXPECT_EQ(7u, iter->second.first);
+ EXPECT_EQ(11u, iter->second.second);
+}
+
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]