This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new b0db3044041 [fix](filecache) ttl cache runs wild after enable LRU 
eviction (#39814)
b0db3044041 is described below

commit b0db304404121c8f634f60b801ff49eb480206dc
Author: zhengyu <freeman.zhang1...@gmail.com>
AuthorDate: Fri Aug 23 23:49:39 2024 +0800

    [fix](filecache) ttl cache runs wild after enable LRU eviction (#39814)
    
    Scenarios where issues occur:
    - TTL with LRU eviction enabled
    (config::enable_ttl_cache_evict_using_lru = true)
    - TTL's try_reserve_for_ttl_without_lru encounters a limitation set by
    config::max_ttl_cache_ratio = 90%
    - LRU begins the eviction process. However, the amount of eviction is
    determined solely by the condition !is_overflow(), which does not
    consider the 90% limitation. This leads to a premature return of a
    successful reserve, resulting in the overall TTL exceeding the 90%
    limit.
    
    Modification:
    For the TTL and LRU eviction quantities, in addition to checking for
    !is_overflow, the condition must also satisfy the restriction set by
    config::max_ttl_cache_ratio.
    
    Signed-off-by: freemandealer <freeman.zhang1...@gmail.com>
---
 be/src/common/config.cpp                   |  1 +
 be/src/io/cache/block_file_cache.cpp       | 33 +++++++++----
 be/src/io/cache/block_file_cache.h         |  5 +-
 be/test/io/cache/block_file_cache_test.cpp | 76 +++++++++++++++++++++++++++++-
 4 files changed, 101 insertions(+), 14 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 78805a58ac6..9905caf0080 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -998,6 +998,7 @@ DEFINE_mBool(variant_throw_exeception_on_invalid_json, 
"false");
 DEFINE_Bool(enable_file_cache, "false");
 // format: 
[{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240}]
 // format: 
[{"path":"/path/to/file_cache","total_size":21474836480,"query_limit":10737418240},{"path":"/path/to/file_cache2","total_size":21474836480,"query_limit":10737418240}]
+// format: {"path": "/path/to/file_cache", "total_size":53687091200, 
"normal_percent":85, "disposable_percent":10, "index_percent":5}
 DEFINE_String(file_cache_path, "");
 DEFINE_Int64(file_cache_each_block_size, "1048576"); // 1MB
 
diff --git a/be/src/io/cache/block_file_cache.cpp 
b/be/src/io/cache/block_file_cache.cpp
index d7dfc743a87..c253130cf3b 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -818,9 +818,9 @@ void BlockFileCache::remove_file_blocks_and_clean_time_maps(
 void BlockFileCache::find_evict_candidates(LRUQueue& queue, size_t size, 
size_t cur_cache_size,
                                            size_t& removed_size,
                                            std::vector<FileBlockCell*>& 
to_evict,
-                                           std::lock_guard<std::mutex>& 
cache_lock) {
+                                           std::lock_guard<std::mutex>& 
cache_lock, bool is_ttl) {
     for (const auto& [entry_key, entry_offset, entry_size] : queue) {
-        if (!is_overflow(removed_size, size, cur_cache_size)) {
+        if (!is_overflow(removed_size, size, cur_cache_size, is_ttl)) {
             break;
         }
         auto* cell = get_cell(entry_key, entry_offset, cache_lock);
@@ -860,7 +860,8 @@ bool BlockFileCache::try_reserve_for_ttl_without_lru(size_t 
size,
     }
     std::vector<FileBlockCell*> to_evict;
     auto collect_eliminate_fragments = [&](LRUQueue& queue) {
-        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock);
+        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock,
+                              false);
     };
     if (disposable_queue_size != 0) {
         collect_eliminate_fragments(get_queue(FileCacheType::DISPOSABLE));
@@ -887,7 +888,8 @@ bool BlockFileCache::try_reserve_for_ttl(size_t size, 
std::lock_guard<std::mutex
         size_t cur_cache_size = _cur_cache_size;
 
         std::vector<FileBlockCell*> to_evict;
-        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock);
+        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock,
+                              true);
         remove_file_blocks_and_clean_time_maps(to_evict, cache_lock);
 
         return !is_overflow(removed_size, size, cur_cache_size);
@@ -1151,10 +1153,19 @@ bool 
BlockFileCache::try_reserve_from_other_queue_by_hot_interval(
     return !is_overflow(removed_size, size, cur_cache_size);
 }
 
-bool BlockFileCache::is_overflow(size_t removed_size, size_t need_size,
-                                 size_t cur_cache_size) const {
-    return _disk_resource_limit_mode ? removed_size < need_size
-                                     : cur_cache_size + need_size - 
removed_size > _capacity;
+bool BlockFileCache::is_overflow(size_t removed_size, size_t need_size, size_t 
cur_cache_size,
+                                 bool is_ttl) const {
+    bool ret = false;
+    if (_disk_resource_limit_mode) {
+        ret = (removed_size < need_size);
+    } else {
+        ret = (cur_cache_size + need_size - removed_size > _capacity);
+    }
+    if (is_ttl) {
+        size_t ttl_threshold = config::max_ttl_cache_ratio * _capacity / 100;
+        return (ret || ((cur_cache_size + need_size - removed_size) > 
ttl_threshold));
+    }
+    return ret;
 }
 
 bool BlockFileCache::try_reserve_from_other_queue_by_size(
@@ -1165,7 +1176,8 @@ bool BlockFileCache::try_reserve_from_other_queue_by_size(
     std::vector<FileBlockCell*> to_evict;
     for (FileCacheType cache_type : other_cache_types) {
         auto& queue = get_queue(cache_type);
-        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock);
+        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock,
+                              false);
     }
     remove_file_blocks(to_evict, cache_lock);
     return !is_overflow(removed_size, size, cur_cache_size);
@@ -1207,7 +1219,8 @@ bool BlockFileCache::try_reserve_for_lru(const 
UInt128Wrapper& hash,
         size_t cur_cache_size = _cur_cache_size;
 
         std::vector<FileBlockCell*> to_evict;
-        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock);
+        find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock,
+                              false);
         remove_file_blocks(to_evict, cache_lock);
 
         if (is_overflow(removed_size, size, cur_cache_size)) {
diff --git a/be/src/io/cache/block_file_cache.h 
b/be/src/io/cache/block_file_cache.h
index cafb57f9a1e..cd44e77eaa3 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -390,7 +390,8 @@ private:
     bool try_reserve_from_other_queue_by_size(std::vector<FileCacheType> 
other_cache_types,
                                               size_t size, 
std::lock_guard<std::mutex>& cache_lock);
 
-    bool is_overflow(size_t removed_size, size_t need_size, size_t 
cur_cache_size) const;
+    bool is_overflow(size_t removed_size, size_t need_size, size_t 
cur_cache_size,
+                     bool is_ttl = false) const;
 
     void remove_file_blocks(std::vector<FileBlockCell*>&, 
std::lock_guard<std::mutex>&);
 
@@ -399,7 +400,7 @@ private:
 
     void find_evict_candidates(LRUQueue& queue, size_t size, size_t 
cur_cache_size,
                                size_t& removed_size, 
std::vector<FileBlockCell*>& to_evict,
-                               std::lock_guard<std::mutex>& cache_lock);
+                               std::lock_guard<std::mutex>& cache_lock, bool 
is_ttl);
     // info
     std::string _cache_base_path;
     size_t _capacity = 0;
diff --git a/be/test/io/cache/block_file_cache_test.cpp 
b/be/test/io/cache/block_file_cache_test.cpp
index 180a0c8f209..77cf48ac8e7 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -4236,11 +4236,83 @@ TEST_F(BlockFileCacheTest, 
ttl_reserve_with_evict_using_lru) {
                      io::FileBlock::State::DOWNLOADED);
     }
 
-    EXPECT_EQ(cache._cur_cache_size, 60);
-    EXPECT_EQ(cache._ttl_queue.cache_size, 60);
+    EXPECT_EQ(cache._cur_cache_size, 50);
+    EXPECT_EQ(cache._ttl_queue.cache_size, 50);
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+}
+
+TEST_F(BlockFileCacheTest, 
ttl_reserve_with_evict_using_lru_meet_max_ttl_cache_ratio_limit) {
+    config::file_cache_ttl_valid_check_interval_second = 4;
+    config::enable_ttl_cache_evict_using_lru = true;
+    int old = config::max_ttl_cache_ratio;
+    config::max_ttl_cache_ratio = 50;
+
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+    TUniqueId query_id;
+    query_id.hi = 1;
+    query_id.lo = 1;
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 30;
+    settings.query_queue_elements = 5;
+    settings.index_queue_size = 30;
+    settings.index_queue_elements = 5;
+    settings.disposable_queue_size = 0;
+    settings.disposable_queue_elements = 0;
+    settings.capacity = 60;
+    settings.max_file_block_size = 30;
+    settings.max_query_cache_size = 30;
+    io::CacheContext context;
+    context.query_id = query_id;
+    auto key = io::BlockFileCache::hash("key1");
+    io::BlockFileCache cache(cache_base_path, settings);
+    context.cache_type = io::FileCacheType::TTL;
+    context.expiration_time = UnixSeconds() + 3600;
+
+    ASSERT_TRUE(cache.initialize());
+    for (int i = 0; i < 100; i++) {
+        if (cache.get_lazy_open_success()) {
+            break;
+        };
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+    for (int64_t offset = 0; offset < (60 * config::max_ttl_cache_ratio / 
100); offset += 5) {
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == 
io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::DOWNLOADED);
+    }
+    EXPECT_EQ(cache._cur_cache_size, 30);
+    EXPECT_EQ(cache._ttl_queue.cache_size, 30);
+    context.cache_type = io::FileCacheType::TTL;
+    context.expiration_time = UnixSeconds() + 3600;
+    for (int64_t offset = 60; offset < 70; offset += 5) {
+        auto holder = cache.get_or_set(key, offset, 5, context);
+        auto segments = fromHolder(holder);
+        ASSERT_EQ(segments.size(), 1);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::EMPTY);
+        ASSERT_TRUE(segments[0]->get_or_set_downloader() == 
io::FileBlock::get_caller_id());
+        download(segments[0]);
+        assert_range(1, segments[0], io::FileBlock::Range(offset, offset + 4),
+                     io::FileBlock::State::DOWNLOADED);
+    }
+
+    EXPECT_EQ(cache._cur_cache_size, 30);
+    EXPECT_EQ(cache._ttl_queue.cache_size, 30);
     if (fs::exists(cache_base_path)) {
         fs::remove_all(cache_base_path);
     }
+    config::max_ttl_cache_ratio = old;
 }
 
 TEST_F(BlockFileCacheTest, reset_capacity) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to