This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0.5 in repository https://gitbox.apache.org/repos/asf/doris.git
commit de3cb382fbe2dac4467a45c210118a8fc2e0af42 Author: zhengyu <zhangzhen...@selectdb.com> AuthorDate: Fri Apr 25 17:53:51 2025 +0800 [fix](cloud) batch process ttl cache block gc to limit lock held time once in a time (pick#50387) (#50402) pick#50387 too many ttl cache blocks gc will burst the cache lock latency and thus affect the query latency. limit them into batches to unleash the lock. --- be/src/common/config.cpp | 3 ++ be/src/common/config.h | 4 ++ be/src/io/cache/block_file_cache.cpp | 18 ++++++--- be/src/io/cache/block_file_cache.h | 1 + be/test/io/cache/block_file_cache_test.cpp | 64 ++++++++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 5 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index f36bd991e09..452e93a3d49 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1097,6 +1097,9 @@ DEFINE_mInt64(cache_lock_held_long_tail_threshold_us, "30000000"); DEFINE_mBool(enable_file_cache_keep_base_compaction_output, "false"); DEFINE_mInt64(file_cache_remove_block_qps_limit, "1000"); DEFINE_mInt64(file_cache_background_gc_interval_ms, "100"); +DEFINE_mInt64(file_cache_background_monitor_interval_ms, "5000"); +DEFINE_mInt64(file_cache_background_ttl_gc_interval_ms, "3000"); +DEFINE_mInt64(file_cache_background_ttl_gc_batch, "1000"); DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800"); DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600"); diff --git a/be/src/common/config.h b/be/src/common/config.h index f927c0ae704..3294dd5e7df 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1138,6 +1138,10 @@ DECLARE_mInt64(cache_lock_held_long_tail_threshold_us); DECLARE_mBool(enable_file_cache_keep_base_compaction_output); DECLARE_mInt64(file_cache_remove_block_qps_limit); DECLARE_mInt64(file_cache_background_gc_interval_ms); +DECLARE_mInt64(file_cache_background_monitor_interval_ms); +DECLARE_mInt64(file_cache_background_ttl_gc_interval_ms); +DECLARE_mInt64(file_cache_background_ttl_gc_batch); + // inverted index searcher cache // cache entry stay time after lookup DECLARE_mInt32(index_cache_entry_stay_time_after_lookup_s); diff --git a/be/src/io/cache/block_file_cache.cpp b/be/src/io/cache/block_file_cache.cpp index 52e36e42f9f..d38e9670f2f 100644 --- a/be/src/io/cache/block_file_cache.cpp +++ b/be/src/io/cache/block_file_cache.cpp @@ -219,6 +219,8 @@ BlockFileCache::BlockFileCache(const std::string& cache_base_path, _recycle_keys_length_recorder = std::make_shared<bvar::LatencyRecorder>( _cache_base_path.c_str(), "file_cache_recycle_keys_length"); + _ttl_gc_latency_us = std::make_shared<bvar::LatencyRecorder>(_cache_base_path.c_str(), + "file_cache_ttl_gc_latency_us"); _disposable_queue = LRUQueue(cache_settings.disposable_queue_size, cache_settings.disposable_queue_elements, 60 * 60); @@ -1829,28 +1831,34 @@ void BlockFileCache::run_background_monitor() { } } -void BlockFileCache::run_background_ttl_gc() { // TODO(zhengyu): fix! - int64_t interval_time_seconds = 20; +void BlockFileCache::run_background_ttl_gc() { while (!_close) { - TEST_SYNC_POINT_CALLBACK("BlockFileCache::set_sleep_time", &interval_time_seconds); + int64_t interval_ms = config::file_cache_background_ttl_gc_interval_ms; + int64_t batch_size = config::file_cache_background_ttl_gc_batch; + TEST_SYNC_POINT_CALLBACK("BlockFileCache::set_sleep_time", &interval_ms); { std::unique_lock close_lock(_close_mtx); - _close_cv.wait_for(close_lock, std::chrono::seconds(interval_time_seconds)); + _close_cv.wait_for(close_lock, std::chrono::milliseconds(interval_ms)); if (_close) { break; } } + int64_t duration_ns = 0; { int64_t cur_time = UnixSeconds(); + int64_t count = 0; SCOPED_CACHE_LOCK(_mutex, this); + SCOPED_RAW_TIMER(&duration_ns); while (!_time_to_key.empty()) { auto begin = _time_to_key.begin(); - if (cur_time < begin->first) { + if (cur_time < begin->first || count > batch_size) { break; } remove_if_ttl_file_blocks(begin->second, false, cache_lock, false); + ++count; } } + *_ttl_gc_latency_us << (duration_ns / 1000); } } diff --git a/be/src/io/cache/block_file_cache.h b/be/src/io/cache/block_file_cache.h index 5634621eb41..a64d339d081 100644 --- a/be/src/io/cache/block_file_cache.h +++ b/be/src/io/cache/block_file_cache.h @@ -574,6 +574,7 @@ private: std::shared_ptr<bvar::LatencyRecorder> _storage_async_remove_latency_us; std::shared_ptr<bvar::LatencyRecorder> _evict_in_advance_latency_us; std::shared_ptr<bvar::LatencyRecorder> _recycle_keys_length_recorder; + std::shared_ptr<bvar::LatencyRecorder> _ttl_gc_latency_us; }; } // namespace doris::io diff --git a/be/test/io/cache/block_file_cache_test.cpp b/be/test/io/cache/block_file_cache_test.cpp index 6aad66de5cc..2c9bfaf2093 100644 --- a/be/test/io/cache/block_file_cache_test.cpp +++ b/be/test/io/cache/block_file_cache_test.cpp @@ -2898,6 +2898,70 @@ TEST_F(BlockFileCacheTest, remove_directly_when_normal_change_to_ttl) { } } +TEST_F(BlockFileCacheTest, ttl_gc) { + if (fs::exists(cache_base_path)) { + fs::remove_all(cache_base_path); + } + fs::create_directories(cache_base_path); + auto sp = SyncPoint::get_instance(); + SyncPoint::CallbackGuard guard1; + sp->enable_processing(); + TUniqueId query_id; + query_id.hi = 1; + query_id.lo = 1; + io::FileCacheSettings settings; + settings.query_queue_size = 50; + settings.query_queue_elements = 5; + settings.ttl_queue_size = 500; + settings.ttl_queue_elements = 500; + settings.capacity = 100; + settings.max_file_block_size = 30; + settings.max_query_cache_size = 30; + + config::file_cache_background_ttl_gc_batch = 6; + config::file_cache_background_ttl_gc_interval_ms = + 3000; // make it big enough to disable auto ttl_gc + + io::BlockFileCache cache(cache_base_path, settings); + ASSERT_TRUE(cache.initialize()); + for (int i = 0; i < 100; i++) { + if (cache.get_async_open_success()) { + break; + }; + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + io::CacheContext context; + ReadStatistics rstats; + context.stats = &rstats; + context.cache_type = io::FileCacheType::TTL; + context.query_id = query_id; + int64_t cur_time = UnixSeconds(); + context.expiration_time = cur_time + 2; + + for (int64_t i = 0; i < 12; ++i) { + auto key = io::BlockFileCache::hash(fmt::format("key{}", i)); + auto holder = cache.get_or_set(key, 0, 5, context); + auto blocks = fromHolder(holder); + ASSERT_EQ(blocks.size(), 1); + assert_range(1, blocks[0], io::FileBlock::Range(0, 4), io::FileBlock::State::EMPTY); + ASSERT_TRUE(blocks[0]->get_or_set_downloader() == io::FileBlock::get_caller_id()); + download(blocks[0]); + assert_range(1, blocks[0], io::FileBlock::Range(0, 4), io::FileBlock::State::DOWNLOADED); + } + ASSERT_EQ(cache._time_to_key.size(), 12); + + std::this_thread::sleep_for(std::chrono::milliseconds(3000)); + ASSERT_GT(cache._time_to_key.size(), 0); + + std::this_thread::sleep_for(std::chrono::milliseconds(3000)); + ASSERT_EQ(cache._time_to_key.size(), 0); + + if (fs::exists(cache_base_path)) { + fs::remove_all(cache_base_path); + } +} + TEST_F(BlockFileCacheTest, recyle_cache_async) { if (fs::exists(cache_base_path)) { fs::remove_all(cache_base_path); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org