This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 4e6197ca60d [improvement](memory) Storage page cache use LRU-K cache, K=2 (#45719) 4e6197ca60d is described below commit 4e6197ca60de0ea07e9cdf597dce68008bfad3f0 Author: Xinyi Zou <zouxi...@selectdb.com> AuthorDate: Tue Dec 24 19:02:14 2024 +0800 [improvement](memory) Storage page cache use LRU-K cache, K=2 (#45719) ### What problem does this PR solve? Storage page cache uses plain LRU Cache, occasional batch operations can cause "cache pollution" in plain LRU Cache. This will cause hotspot data to be squeezed out of the cache by non-hotspot data, reduce cache hit rate. In extreme cases, if the number of pages inserted each time is greater than the cache capacity, the cache hit rate will be 0. Introducing LRU-K Cache avoids "cache pollution" in most cases. Last submitted #28794 Similar PR: https://github.com/apache/doris/pull/23546 Performance test on branch-2.1: 1-FE, 3-BE (32 Core, 128GB Memory) Doris cluster `ssb_sf_1000` all query cost: 18.85s -> 14.44s, 23% improvement `ssb_flat_1000` all query cost: 12.21s -> 8.54s, 30% improvement, cache hit rate 1% -> 40% --- be/src/olap/lru_cache.cpp | 104 +++++++++++++++++++++++------ be/src/olap/lru_cache.h | 46 ++++++------- be/src/olap/page_cache.h | 3 +- be/src/runtime/memory/lru_cache_policy.h | 9 +-- be/test/olap/lru_cache_test.cpp | 108 +++++++++++++++++++++++++++++-- be/test/olap/page_cache_test.cpp | 29 ++++++++- 6 files changed, 240 insertions(+), 59 deletions(-) diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index 9895a013894..bd92142c312 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -167,7 +167,7 @@ uint32_t HandleTable::element_count() const { return _elems; } -LRUCache::LRUCache(LRUCacheType type) : _type(type) { +LRUCache::LRUCache(LRUCacheType type, bool is_lru_k) : _type(type), _is_lru_k(is_lru_k) { // Make empty circular linked list _lru_normal.next = &_lru_normal; _lru_normal.prev = &_lru_normal; @@ -305,6 +305,17 @@ Cache::Handle* LRUCache::lookup(const CacheKey& key, uint32_t hash) { } else { ++_miss_count; } + + // If key not exist in cache, and is lru k cache, and key in visits list, + // then move the key to beginning of the visits list. + // key in visits list indicates that the key has been inserted once after the cache is full. + if (e == nullptr && _is_lru_k) { + auto it = _visits_lru_cache_map.find(hash); + if (it != _visits_lru_cache_map.end()) { + _visits_lru_cache_list.splice(_visits_lru_cache_list.begin(), _visits_lru_cache_list, + it->second); + } + } return reinterpret_cast<Cache::Handle*>(e); } @@ -316,10 +327,10 @@ void LRUCache::release(Cache::Handle* handle) { bool last_ref = false; { std::lock_guard l(_mutex); + // if last_ref is true, key may have been evict from the cache, + // or if it is lru k, first insert of key may have failed. last_ref = _unref(e); - if (last_ref) { - _usage -= e->total_size; - } else if (e->in_cache && e->refs == 1) { + if (e->in_cache && e->refs == 1) { // only exists in cache if (_usage > _capacity) { // take this opportunity and remove the item @@ -327,6 +338,8 @@ void LRUCache::release(Cache::Handle* handle) { DCHECK(removed); e->in_cache = false; _unref(e); + // `entry->in_cache = false` and `_usage -= entry->total_size;` and `_unref(entry)` should appear together. + // see the comment for old entry in `LRUCache::insert`. _usage -= e->total_size; last_ref = true; } else { @@ -401,6 +414,8 @@ void LRUCache::_evict_one_entry(LRUHandle* e) { DCHECK(removed); e->in_cache = false; _unref(e); + // `entry->in_cache = false` and `_usage -= entry->total_size;` and `_unref(entry)` should appear together. + // see the comment for old entry in `LRUCache::insert`. _usage -= e->total_size; } @@ -408,6 +423,42 @@ bool LRUCache::_check_element_count_limit() { return _element_count_capacity != 0 && _table.element_count() >= _element_count_capacity; } +// After cache is full, +// 1.Return false. If key has been inserted into the visits list before, +// key is allowed to be inserted into cache this time (this will trigger cache evict), +// and key is removed from the visits list. +// 2. Return true. If key not in visits list, insert it into visits list. +bool LRUCache::_lru_k_insert_visits_list(size_t total_size, visits_lru_cache_key visits_key) { + if (_usage + total_size > _capacity || + _check_element_count_limit()) { // this line no lock required + auto it = _visits_lru_cache_map.find(visits_key); + if (it != _visits_lru_cache_map.end()) { + _visits_lru_cache_usage -= it->second->second; + _visits_lru_cache_list.erase(it->second); + _visits_lru_cache_map.erase(it); + } else { + // _visits_lru_cache_list capacity is same as the cache itself. + // If _visits_lru_cache_list is full, some keys will also be evict. + while (_visits_lru_cache_usage + total_size > _capacity && + _visits_lru_cache_usage != 0) { + DCHECK(!_visits_lru_cache_map.empty()); + _visits_lru_cache_usage -= _visits_lru_cache_list.back().second; + _visits_lru_cache_map.erase(_visits_lru_cache_list.back().first); + _visits_lru_cache_list.pop_back(); + } + // 1. If true, insert key at the beginning of _visits_lru_cache_list. + // 2. If false, it means total_size > cache _capacity, preventing this insert. + if (_visits_lru_cache_usage + total_size <= _capacity) { + _visits_lru_cache_list.emplace_front(visits_key, total_size); + _visits_lru_cache_map[visits_key] = _visits_lru_cache_list.begin(); + _visits_lru_cache_usage += total_size; + } + return true; + } + } + return false; +} + Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void* value, size_t charge, CachePriority priority) { size_t handle_size = sizeof(LRUHandle) - 1 + key.size(); @@ -419,17 +470,22 @@ Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void* value, // because charge at this time is no longer the memory size, but an weight. e->total_size = (_type == LRUCacheType::SIZE ? handle_size + charge : charge); e->hash = hash; - e->refs = 2; // one for the returned handle, one for LRUCache. + e->refs = 1; // only one for the returned handle. e->next = e->prev = nullptr; - e->in_cache = true; + e->in_cache = false; e->priority = priority; e->type = _type; memcpy(e->key_data, key.data(), key.size()); e->last_visit_time = UnixMillis(); + LRUHandle* to_remove_head = nullptr; { std::lock_guard l(_mutex); + if (_is_lru_k && _lru_k_insert_visits_list(e->total_size, hash)) { + return reinterpret_cast<Cache::Handle*>(e); + } + // Free the space following strict LRU policy until enough space // is freed or the lru list is empty if (_cache_value_check_timestamp) { @@ -441,13 +497,22 @@ Cache::Handle* LRUCache::insert(const CacheKey& key, uint32_t hash, void* value, // insert into the cache // note that the cache might get larger than its capacity if not enough // space was freed - auto old = _table.insert(e); + auto* old = _table.insert(e); + e->in_cache = true; _usage += e->total_size; + e->refs++; // one for the returned handle, one for LRUCache. if (old != nullptr) { _stampede_count++; old->in_cache = false; + // `entry->in_cache = false` and `_usage -= entry->total_size;` and `_unref(entry)` should appear together. + // Whether the reference of the old entry is 0, the cache usage is subtracted here, + // because the old entry has been removed from the cache and should not be counted in the cache capacity, + // but the memory of the old entry is still tracked by the cache memory_tracker. + // After all the old handles are released, the old entry will be freed and the memory of the old entry + // will be released from the cache memory_tracker. + _usage -= old->total_size; + // if false, old entry is being used externally, just ref-- and sub _usage, if (_unref(old)) { - _usage -= old->total_size; // old is on LRU because it's in cache and its reference count // was just 1 (Unref returned 0) _lru_remove(old); @@ -476,14 +541,15 @@ void LRUCache::erase(const CacheKey& key, uint32_t hash) { e = _table.remove(key, hash); if (e != nullptr) { last_ref = _unref(e); - if (last_ref) { - _usage -= e->total_size; - if (e->in_cache) { - // locate in free list - _lru_remove(e); - } + // if last_ref is false or in_cache is false, e must not be in lru + if (last_ref && e->in_cache) { + // locate in free list + _lru_remove(e); } e->in_cache = false; + // `entry->in_cache = false` and `_usage -= entry->total_size;` and `_unref(entry)` should appear together. + // see the comment for old entry in `LRUCache::insert`. + _usage -= e->total_size; } } // free handle out of mutex, when last_ref is true, e must not be nullptr @@ -576,7 +642,8 @@ inline uint32_t ShardedLRUCache::_hash_slice(const CacheKey& s) { } ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, - uint32_t num_shards, uint32_t total_element_count_capacity) + uint32_t num_shards, uint32_t total_element_count_capacity, + bool is_lru_k) : _name(name), _num_shard_bits(Bits::FindLSBSetNonZero(num_shards)), _num_shards(num_shards), @@ -592,7 +659,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t capacity, LRUCa (total_element_count_capacity + (_num_shards - 1)) / _num_shards; LRUCache** shards = new (std::nothrow) LRUCache*[_num_shards]; for (int s = 0; s < _num_shards; s++) { - shards[s] = new LRUCache(type); + shards[s] = new LRUCache(type, is_lru_k); shards[s]->set_capacity(per_shard); shards[s]->set_element_count_capacity(per_shard_element_count_capacity); } @@ -623,8 +690,9 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t capacity, LRUCa uint32_t num_shards, CacheValueTimeExtractor cache_value_time_extractor, bool cache_value_check_timestamp, - uint32_t total_element_count_capacity) - : ShardedLRUCache(name, capacity, type, num_shards, total_element_count_capacity) { + uint32_t total_element_count_capacity, bool is_lru_k) + : ShardedLRUCache(name, capacity, type, num_shards, total_element_count_capacity, + is_lru_k) { for (int s = 0; s < _num_shards; s++) { _shards[s]->set_cache_value_time_extractor(cache_value_time_extractor); _shards[s]->set_cache_value_check_timestamp(cache_value_check_timestamp); diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 4a4b6ddd005..69def481d4c 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -26,30 +26,6 @@ namespace doris { -#define OLAP_CACHE_STRING_TO_BUF(cur, str, r_len) \ - do { \ - if (r_len > str.size()) { \ - memcpy(cur, str.c_str(), str.size()); \ - r_len -= str.size(); \ - cur += str.size(); \ - } else { \ - LOG(WARNING) << "construct cache key buf not enough."; \ - return CacheKey(nullptr, 0); \ - } \ - } while (0) - -#define OLAP_CACHE_NUMERIC_TO_BUF(cur, numeric, r_len) \ - do { \ - if (r_len > sizeof(numeric)) { \ - memcpy(cur, &numeric, sizeof(numeric)); \ - r_len -= sizeof(numeric); \ - cur += sizeof(numeric); \ - } else { \ - LOG(WARNING) << "construct cache key buf not enough."; \ - return CacheKey(nullptr, 0); \ - } \ - } while (0) - class Cache; class LRUCachePolicy; struct LRUHandle; @@ -62,6 +38,7 @@ enum LRUCacheType { static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE; static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32; static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0; +static constexpr bool DEFAULT_LRU_CACHE_IS_LRU_K = false; class CacheKey { public: @@ -180,6 +157,10 @@ public: // // When the inserted entry is no longer needed, the key and // value will be passed to "deleter". + // + // if cache is lru k and cache is full, first insert of key will not succeed. + // + // Note: if is ShardedLRUCache, cache capacity = ShardedLRUCache_capacity / num_shards. virtual Handle* insert(const CacheKey& key, void* value, size_t charge, CachePriority priority = CachePriority::NORMAL) = 0; @@ -326,9 +307,12 @@ using LRUHandleSortedSet = std::set<std::pair<int64_t, LRUHandle*>>; // A single shard of sharded cache. class LRUCache { public: - LRUCache(LRUCacheType type); + LRUCache(LRUCacheType type, bool is_lru_k = DEFAULT_LRU_CACHE_IS_LRU_K); ~LRUCache(); + using visits_lru_cache_key = uint32_t; + using visits_lru_cache_pair = std::pair<visits_lru_cache_key, size_t>; + // Separate from constructor so caller can easily make an array of LRUCache PrunedInfo set_capacity(size_t capacity); void set_element_count_capacity(uint32_t element_count_capacity) { @@ -365,6 +349,7 @@ private: void _evict_from_lru_with_time(size_t total_size, LRUHandle** to_remove_head); void _evict_one_entry(LRUHandle* e); bool _check_element_count_limit(); + bool _lru_k_insert_visits_list(size_t total_size, visits_lru_cache_key visits_key); private: LRUCacheType _type; @@ -396,6 +381,12 @@ private: LRUHandleSortedSet _sorted_durable_entries_with_timestamp; uint32_t _element_count_capacity = 0; + + bool _is_lru_k = false; // LRU-K algorithm, K=2 + std::list<visits_lru_cache_pair> _visits_lru_cache_list; + std::unordered_map<visits_lru_cache_key, std::list<visits_lru_cache_pair>::iterator> + _visits_lru_cache_map; + size_t _visits_lru_cache_usage = 0; }; class ShardedLRUCache : public Cache { @@ -420,11 +411,12 @@ private: friend class LRUCachePolicy; explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, - uint32_t num_shards, uint32_t element_count_capacity); + uint32_t num_shards, uint32_t element_count_capacity, bool is_lru_k); explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, uint32_t num_shards, CacheValueTimeExtractor cache_value_time_extractor, - bool cache_value_check_timestamp, uint32_t element_count_capacity); + bool cache_value_check_timestamp, uint32_t element_count_capacity, + bool is_lru_k); void update_cache_metrics() const; diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h index db1a6808345..64212e8c06e 100644 --- a/be/src/olap/page_cache.h +++ b/be/src/olap/page_cache.h @@ -97,7 +97,8 @@ public: DataPageCache(size_t capacity, uint32_t num_shards) : LRUCachePolicy(CachePolicy::CacheType::DATA_PAGE_CACHE, capacity, LRUCacheType::SIZE, config::data_page_cache_stale_sweep_time_sec, - num_shards) {} + num_shards, DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, true, true) { + } }; class IndexPageCache : public LRUCachePolicy { diff --git a/be/src/runtime/memory/lru_cache_policy.h b/be/src/runtime/memory/lru_cache_policy.h index d4c282dab82..7e73f2dd76b 100644 --- a/be/src/runtime/memory/lru_cache_policy.h +++ b/be/src/runtime/memory/lru_cache_policy.h @@ -36,13 +36,13 @@ public: LRUCachePolicy(CacheType type, size_t capacity, LRUCacheType lru_cache_type, uint32_t stale_sweep_time_s, uint32_t num_shards = DEFAULT_LRU_CACHE_NUM_SHARDS, uint32_t element_count_capacity = DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, - bool enable_prune = true) + bool enable_prune = true, bool is_lru_k = DEFAULT_LRU_CACHE_IS_LRU_K) : CachePolicy(type, capacity, stale_sweep_time_s, enable_prune), _lru_cache_type(lru_cache_type) { if (check_capacity(capacity, num_shards)) { _cache = std::shared_ptr<ShardedLRUCache>( new ShardedLRUCache(type_string(type), capacity, lru_cache_type, num_shards, - element_count_capacity)); + element_count_capacity, is_lru_k)); } else { CHECK(ExecEnv::GetInstance()->get_dummy_lru_cache()); _cache = ExecEnv::GetInstance()->get_dummy_lru_cache(); @@ -54,14 +54,15 @@ public: uint32_t stale_sweep_time_s, uint32_t num_shards, uint32_t element_count_capacity, CacheValueTimeExtractor cache_value_time_extractor, - bool cache_value_check_timestamp, bool enable_prune = true) + bool cache_value_check_timestamp, bool enable_prune = true, + bool is_lru_k = DEFAULT_LRU_CACHE_IS_LRU_K) : CachePolicy(type, capacity, stale_sweep_time_s, enable_prune), _lru_cache_type(lru_cache_type) { if (check_capacity(capacity, num_shards)) { _cache = std::shared_ptr<ShardedLRUCache>( new ShardedLRUCache(type_string(type), capacity, lru_cache_type, num_shards, cache_value_time_extractor, cache_value_check_timestamp, - element_count_capacity)); + element_count_capacity, is_lru_k)); } else { CHECK(ExecEnv::GetInstance()->get_dummy_lru_cache()); _cache = ExecEnv::GetInstance()->get_dummy_lru_cache(); diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp index 1acc38f2b9e..a48575e72ea 100644 --- a/be/test/olap/lru_cache_test.cpp +++ b/be/test/olap/lru_cache_test.cpp @@ -24,6 +24,7 @@ #include <iosfwd> #include <vector> +#include "gtest/gtest.h" #include "gtest/gtest_pred_impl.h" #include "runtime/memory/lru_cache_policy.h" #include "runtime/memory/lru_cache_value_base.h" @@ -105,6 +106,9 @@ public: // there is 16 shards in ShardedLRUCache // And the LRUHandle size is about 100B. So the cache size should big enough // to run the UT. + // kCacheSize needs to be an even number. if odd number, the cache will behave correctly, + // but the UT Test will fail because check(capacity / 2) will fail. + // In fact, Cache will waste an entry space. static const int kCacheSize = 1000 * 16; std::vector<int> _deleted_keys; std::vector<int> _deleted_values; @@ -325,7 +329,74 @@ TEST_F(CacheTest, Usage) { CacheKey key7("950"); insert_LRUCache(cache, key7, 950, CachePriority::DURABLE); - ASSERT_EQ(0, cache.get_usage()); // evict 298 698, because 950 + 98 > 1040, so insert failed + ASSERT_EQ( + 0, + cache.get_usage()); // evict 298 698, because 950 + 98 > 1040, data was freed when handle release. + + CacheKey key8("900"); + insert_LRUCache(cache, key8, 900, CachePriority::NORMAL); + ASSERT_EQ(998, cache.get_usage()); // 900 + 98 < 1050 +} + +TEST_F(CacheTest, UsageLRUK) { + LRUCache cache(LRUCacheType::SIZE, true); + cache.set_capacity(1050); + + // The lru usage is handle_size + charge. + // handle_size = sizeof(handle) - 1 + key size = 96 - 1 + 3 = 98 + CacheKey key1("100"); + insert_LRUCache(cache, key1, 100, CachePriority::NORMAL); + ASSERT_EQ(198, cache.get_usage()); // 100 + 98 + + CacheKey key2("200"); + insert_LRUCache(cache, key2, 200, CachePriority::DURABLE); + ASSERT_EQ(496, cache.get_usage()); // 198 + 298(d), d = DURABLE + + CacheKey key3("300"); + insert_LRUCache(cache, key3, 300, CachePriority::NORMAL); + ASSERT_EQ(894, cache.get_usage()); // 198 + 298(d) + 398 + + CacheKey key4("400"); + insert_LRUCache(cache, key4, 400, CachePriority::NORMAL); + // Data cache is full, not insert, visits lru cache not exist key=498(400 + 98) and insert it. + ASSERT_EQ(894, cache.get_usage()); + + insert_LRUCache(cache, key4, 400, CachePriority::NORMAL); + // Data cache 298(d) + 498, evict 198 398. visits lru cache exist key=498 + // and erase from visits lru cache, insert to Data cache. + ASSERT_EQ(796, cache.get_usage()); + + CacheKey key5("500"); + insert_LRUCache(cache, key5, 500, CachePriority::NORMAL); + // Data cache is full, not insert, visits lru cache not exist key=598(500 + 98) and insert it. + ASSERT_EQ(796, cache.get_usage()); + + CacheKey key6("600"); + insert_LRUCache(cache, key6, 600, CachePriority::NORMAL); + // Data cache is full, not insert, visits lru cache not exist key=698(600 + 98) and insert it, + // visits lru cache is full, evict key=598 from visits lru cache. + ASSERT_EQ(796, cache.get_usage()); + + insert_LRUCache(cache, key5, 500, CachePriority::NORMAL); + // Data cache is full, not insert, visits lru cache not exist key=598 and insert it. + // visits lru cache is full, evict key=698 from visits lru cache. + ASSERT_EQ(796, cache.get_usage()); + + insert_LRUCache(cache, key5, 500, CachePriority::NORMAL); + // Data cache 298(d) + 598, evict 498. visits lru cache exist key=598 + // and erase from visits lru cache, insert to Data cache. + ASSERT_EQ(896, cache.get_usage()); + + CacheKey key7("980"); + insert_LRUCache(cache, key7, 980, CachePriority::DURABLE); + // Data cache is full, not insert, visits lru cache not exist key=1078(980 + 98) + // but 1078 > capacity(1050), not insert visits lru cache. + ASSERT_EQ(896, cache.get_usage()); + + insert_LRUCache(cache, key7, 980, CachePriority::DURABLE); + // Ssame as above, data cache is full, not insert, visits lru cache not exist key=1078(980 + 98) + // but 1078 > capacity(1050), not insert visits lru cache. + ASSERT_EQ(896, cache.get_usage()); } TEST_F(CacheTest, Prune) { @@ -661,26 +732,51 @@ TEST_F(CacheTest, SetCapacity) { cache()->get_usage()); // Handle not be released, so key cannot be evicted. for (int i = 0; i < kCacheSize; i++) { + // The Key exists in the Cache, remove the old Entry from the Cache, and insert it again. Insert(i + kCacheSize, 2000 + i, 1); - EXPECT_EQ(-1, Lookup(i + kCacheSize)); // Cache is full, insert failed. + if (i < kCacheSize / 2) { + // Insert() will replace the entry with the same key in the cache, the replaced entry will + // not be freed because there are unreleased handles holding them. + // The current cache capacity(kCacheSize/2) is half of the cache usage(kCacheSize), + // Insert() method will immediately release the handle of the newly inserted entry, + // so the newly inserted entry will be freed, until cache usage is less than or equal to capacity. + ASSERT_GE(cache()->get_usage(), cache()->get_capacity()); + EXPECT_EQ(-1, Lookup(i + kCacheSize)); + } else if (i == kCacheSize / 2) { + // When cache usage is equal to cache capacity, Insert() will replace the old entry + // with the same key and will not free the entry after releasing the handle. + ASSERT_EQ(cache()->get_usage(), cache()->get_capacity()); + EXPECT_EQ(2000 + i, Lookup(i + kCacheSize)); + } else { + // When inserting at `i == kCacheSize / 2 + 1`, the cache usage is equal to the cache capacity, + // so the entry in the LRU list will be evicted (usage - 1) and then inserted (usage + 1). + // because the entry inserted is an existing key, the old entry with the same key is evicted (usage - 1), + // so the final cache usage is equal to (capacity - 1). + ASSERT_EQ(cache()->get_usage(), cache()->get_capacity() - 1); + EXPECT_EQ(2000 + i, Lookup(i + kCacheSize)); + } } ASSERT_EQ(kCacheSize / 2, cache()->get_capacity()); - ASSERT_EQ(kCacheSize, cache()->get_usage()); + // Here cache usage equals cache capacity - 1, because the entry inserted in the previous step + // at `i == kCacheSize / 2 + 1` was evicted, see the reason above. + // Entries held by unreleased handles in `handles` will not be counted in cache usage, + // but will still be counted in the memory tracker. + ASSERT_EQ(kCacheSize / 2 - 1, cache()->get_usage()); cache()->adjust_capacity_weighted(2); ASSERT_EQ(kCacheSize * 2, cache()->get_capacity()); - ASSERT_EQ(kCacheSize, cache()->get_usage()); + ASSERT_EQ(kCacheSize / 2 - 1, cache()->get_usage()); for (int i = 0; i < kCacheSize; i++) { Insert(i, 3000 + i, 1); EXPECT_EQ(3000 + i, Lookup(i)); } ASSERT_EQ(kCacheSize * 2, cache()->get_capacity()); - ASSERT_EQ(kCacheSize * 2, cache()->get_usage()); + ASSERT_EQ(kCacheSize * 1.5 - 1, cache()->get_usage()); cache()->adjust_capacity_weighted(0); ASSERT_EQ(0, cache()->get_capacity()); - ASSERT_EQ(kCacheSize, cache()->get_usage()); + ASSERT_EQ(0, cache()->get_usage()); for (auto it : handles) { cache()->release(it); diff --git a/be/test/olap/page_cache_test.cpp b/be/test/olap/page_cache_test.cpp index 1feb6152add..a6b9300c105 100644 --- a/be/test/olap/page_cache_test.cpp +++ b/be/test/olap/page_cache_test.cpp @@ -71,7 +71,8 @@ TEST_F(StoragePageCacheTest, data_page_only) { EXPECT_TRUE(found); } - // put too many page to eliminate first page + // Page Cache is LRU-K, K=2. + // Put too many page, after cache is full, no key is inserted twice and no evict occurs. for (int i = 0; i < 10 * kNumShards; ++i) { StoragePageCache::CacheKey key("bcd", 0, i); PageCacheHandle handle; @@ -95,6 +96,27 @@ TEST_F(StoragePageCacheTest, data_page_only) { EXPECT_FALSE(found); } + // After cache is full, no key is inserted twice and no evict occurs. + { + PageCacheHandle handle; + auto found = cache.lookup(key, &handle, page_type); + EXPECT_TRUE(found); + } + + // put too many page twice to eliminate first page + for (int i = 0; i < 10 * kNumShards; ++i) { + StoragePageCache::CacheKey key("bcde", 0, i); + PageCacheHandle handle; + auto* data = new DataPage(1024, true, page_type); + cache.insert(key, data, &handle, page_type, false); + auto found = cache.lookup(key, &handle, page_type); // after handle destruct, free data. + EXPECT_FALSE(found); + data = new DataPage(1024, true, page_type); + cache.insert(key, data, &handle, page_type, false); + found = cache.lookup(key, &handle, page_type); + EXPECT_TRUE(found); + } + // cache miss for eliminated key { PageCacheHandle handle; @@ -253,12 +275,13 @@ TEST_F(StoragePageCacheTest, mixed_pages) { EXPECT_FALSE(found_index); } - // cache miss for eliminated key { PageCacheHandle data_handle, index_handle; auto found_data = cache.lookup(data_key, &data_handle, page_type_data); auto found_index = cache.lookup(index_key, &index_handle, page_type_index); - EXPECT_FALSE(found_data); + // after cache is full, no key is inserted twice and no evict occurs + EXPECT_TRUE(found_data); + // cache miss for eliminated key EXPECT_FALSE(found_index); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org