This is an automated email from the ASF dual-hosted git repository.
hellostephen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c7089faad6c [fix](filecache) BlockFileCache use-after-free caused by
destructor order (#61192)
c7089faad6c is described below
commit c7089faad6c5a1584cfc31d5c582344211ddb21e
Author: zhengyu <[email protected]>
AuthorDate: Fri Mar 20 16:11:00 2026 +0800
[fix](filecache) BlockFileCache use-after-free caused by destructor order
(#61192)
- In BlockFileCache::initialize_unlocked(): _storage->init(this) is
executed first, then _ttl_mgr is created via raw pointer from
fs_storage->get_meta_store()
- Member variables order: _ttl_mgr (front), _storage (back)
- C++ reverse destructor order leads to _storage being destructed before
_ttl_mgr
- Risk: *_ttl_mgr threads may access already-destructed
_storage/_meta_store
---
be/src/io/cache/block_file_cache.cpp | 138 +++++++++++++++------------
be/src/io/cache/block_file_cache.h | 6 ++
be/src/io/cache/block_file_cache_ttl_mgr.cpp | 31 ++++--
be/src/io/cache/block_file_cache_ttl_mgr.h | 5 +-
4 files changed, 111 insertions(+), 69 deletions(-)
diff --git a/be/src/io/cache/block_file_cache.cpp
b/be/src/io/cache/block_file_cache.cpp
index a1109443aeb..5d66700636e 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -2198,76 +2198,94 @@ void BlockFileCache::clear_need_update_lru_blocks() {
*_need_update_lru_blocks_length_recorder << _need_update_lru_blocks.size();
}
+void BlockFileCache::pause_ttl_manager() {
+ if (_ttl_mgr) {
+ _ttl_mgr->stop();
+ }
+}
+
+void BlockFileCache::resume_ttl_manager() {
+ if (_ttl_mgr) {
+ _ttl_mgr->resume();
+ }
+}
+
std::string BlockFileCache::clear_file_cache_directly() {
+ pause_ttl_manager();
_lru_dumper->remove_lru_dump_files();
using namespace std::chrono;
std::stringstream ss;
auto start = steady_clock::now();
- SCOPED_CACHE_LOCK(_mutex, this);
- LOG_INFO("start clear_file_cache_directly").tag("path", _cache_base_path);
-
- std::string clear_msg;
- auto s = _storage->clear(clear_msg);
- if (!s.ok()) {
- return clear_msg;
- }
-
- int64_t num_files = _files.size();
- int64_t cache_size = _cur_cache_size;
- int64_t index_queue_size = _index_queue.get_elements_num(cache_lock);
- int64_t normal_queue_size = _normal_queue.get_elements_num(cache_lock);
- int64_t disposible_queue_size =
_disposable_queue.get_elements_num(cache_lock);
- int64_t ttl_queue_size = _ttl_queue.get_elements_num(cache_lock);
-
- int64_t clear_fd_duration = 0;
+ std::string result;
{
- // clear FDCache to release fd
- SCOPED_RAW_TIMER(&clear_fd_duration);
- for (const auto& [file_key, file_blocks] : _files) {
- for (const auto& [offset, file_block_cell] : file_blocks) {
- AccessKeyAndOffset access_key_and_offset(file_key, offset);
- FDCache::instance()->remove_file_reader(access_key_and_offset);
+ SCOPED_CACHE_LOCK(_mutex, this);
+ LOG_INFO("start clear_file_cache_directly").tag("path",
_cache_base_path);
+
+ std::string clear_msg;
+ auto s = _storage->clear(clear_msg);
+ if (!s.ok()) {
+ result = clear_msg;
+ } else {
+ int64_t num_files = _files.size();
+ int64_t cache_size = _cur_cache_size;
+ int64_t index_queue_size =
_index_queue.get_elements_num(cache_lock);
+ int64_t normal_queue_size =
_normal_queue.get_elements_num(cache_lock);
+ int64_t disposible_queue_size =
_disposable_queue.get_elements_num(cache_lock);
+ int64_t ttl_queue_size = _ttl_queue.get_elements_num(cache_lock);
+
+ int64_t clear_fd_duration = 0;
+ {
+ // clear FDCache to release fd
+ SCOPED_RAW_TIMER(&clear_fd_duration);
+ for (const auto& [file_key, file_blocks] : _files) {
+ for (const auto& [offset, file_block_cell] : file_blocks) {
+ AccessKeyAndOffset access_key_and_offset(file_key,
offset);
+
FDCache::instance()->remove_file_reader(access_key_and_offset);
+ }
+ }
}
+
+ _files.clear();
+ _cur_cache_size = 0;
+ _cur_ttl_size = 0;
+ _time_to_key.clear();
+ _key_to_time.clear();
+ _index_queue.clear(cache_lock);
+ _normal_queue.clear(cache_lock);
+ _disposable_queue.clear(cache_lock);
+ _ttl_queue.clear(cache_lock);
+
+ // Update cache metrics immediately so consumers observe the
cleared state
+ // without waiting for the next background monitor round.
+ _cur_cache_size_metrics->set_value(0);
+ _cur_ttl_cache_size_metrics->set_value(0);
+ _cur_ttl_cache_lru_queue_cache_size_metrics->set_value(0);
+ _cur_ttl_cache_lru_queue_element_count_metrics->set_value(0);
+ _cur_normal_queue_cache_size_metrics->set_value(0);
+ _cur_normal_queue_element_count_metrics->set_value(0);
+ _cur_index_queue_cache_size_metrics->set_value(0);
+ _cur_index_queue_element_count_metrics->set_value(0);
+ _cur_disposable_queue_cache_size_metrics->set_value(0);
+ _cur_disposable_queue_element_count_metrics->set_value(0);
+
+ clear_need_update_lru_blocks();
+
+ ss << "finish clear_file_cache_directly"
+ << " path=" << _cache_base_path << " time_elapsed_ms="
+ << duration_cast<milliseconds>(steady_clock::now() -
start).count()
+ << " fd_clear_time_ms=" << (clear_fd_duration / 1000000)
+ << " num_files=" << num_files << " cache_size=" << cache_size
+ << " index_queue_size=" << index_queue_size
+ << " normal_queue_size=" << normal_queue_size
+ << " disposible_queue_size=" << disposible_queue_size
+ << "ttl_queue_size=" << ttl_queue_size;
+ result = ss.str();
+ LOG(INFO) << result;
}
}
-
- _files.clear();
- _cur_cache_size = 0;
- _cur_ttl_size = 0;
- _time_to_key.clear();
- _key_to_time.clear();
- _index_queue.clear(cache_lock);
- _normal_queue.clear(cache_lock);
- _disposable_queue.clear(cache_lock);
- _ttl_queue.clear(cache_lock);
-
- // Synchronously update cache metrics so that
information_schema.file_cache_statistics
- // reflects the cleared state immediately, without waiting for the next
- // run_background_monitor cycle.
- _cur_cache_size_metrics->set_value(0);
- _cur_ttl_cache_size_metrics->set_value(0);
- _cur_ttl_cache_lru_queue_cache_size_metrics->set_value(0);
- _cur_ttl_cache_lru_queue_element_count_metrics->set_value(0);
- _cur_normal_queue_cache_size_metrics->set_value(0);
- _cur_normal_queue_element_count_metrics->set_value(0);
- _cur_index_queue_cache_size_metrics->set_value(0);
- _cur_index_queue_element_count_metrics->set_value(0);
- _cur_disposable_queue_cache_size_metrics->set_value(0);
- _cur_disposable_queue_element_count_metrics->set_value(0);
-
- clear_need_update_lru_blocks();
-
- ss << "finish clear_file_cache_directly"
- << " path=" << _cache_base_path
- << " time_elapsed_ms=" <<
duration_cast<milliseconds>(steady_clock::now() - start).count()
- << " fd_clear_time_ms=" << (clear_fd_duration / 1000000) << "
num_files=" << num_files
- << " cache_size=" << cache_size << " index_queue_size=" <<
index_queue_size
- << " normal_queue_size=" << normal_queue_size
- << " disposible_queue_size=" << disposible_queue_size <<
"ttl_queue_size=" << ttl_queue_size;
- auto msg = ss.str();
- LOG(INFO) << msg;
_lru_dumper->remove_lru_dump_files();
- return msg;
+ resume_ttl_manager();
+ return result;
}
std::map<size_t, FileBlockSPtr> BlockFileCache::get_blocks_by_key(const
UInt128Wrapper& hash) {
diff --git a/be/src/io/cache/block_file_cache.h
b/be/src/io/cache/block_file_cache.h
index e1c64a665bf..4c155ba2d44 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -200,6 +200,9 @@ public:
if (_cache_background_block_lru_update_thread.joinable()) {
_cache_background_block_lru_update_thread.join();
}
+ if (_ttl_mgr) {
+ _ttl_mgr.reset();
+ }
}
/// Restore cache from local filesystem.
@@ -307,6 +310,9 @@ public:
void update_ttl_atime(const UInt128Wrapper& hash);
+ void pause_ttl_manager();
+ void resume_ttl_manager();
+
std::map<std::string, double> get_stats();
// for be UTs
diff --git a/be/src/io/cache/block_file_cache_ttl_mgr.cpp
b/be/src/io/cache/block_file_cache_ttl_mgr.cpp
index 7b9e66489cc..939045aa71e 100644
--- a/be/src/io/cache/block_file_cache_ttl_mgr.cpp
+++ b/be/src/io/cache/block_file_cache_ttl_mgr.cpp
@@ -40,16 +40,15 @@ BlockFileCacheTtlMgr::BlockFileCacheTtlMgr(BlockFileCache*
mgr, CacheBlockMetaSt
: _mgr(mgr), _meta_store(meta_store), _stop_background(false) {
_tablet_id_set_size_metrics = std::make_shared<bvar::Status<size_t>>(
_mgr->get_base_path().c_str(),
"file_cache_ttl_mgr_tablet_id_set_size", 0);
- // Start background threads
- _update_ttl_thread =
-
std::thread(&BlockFileCacheTtlMgr::run_backgroud_update_ttl_info_map, this);
- _expiration_check_thread =
- std::thread(&BlockFileCacheTtlMgr::run_backgroud_expiration_check,
this);
- _tablet_id_flush_thread =
- std::thread(&BlockFileCacheTtlMgr::run_background_tablet_id_flush,
this);
+ resume();
}
BlockFileCacheTtlMgr::~BlockFileCacheTtlMgr() {
+ stop();
+}
+
+void BlockFileCacheTtlMgr::stop() {
+ std::lock_guard<std::mutex> lifecycle_lock(_thread_lifecycle_mutex);
_stop_background.store(true, std::memory_order_release);
if (_update_ttl_thread.joinable()) {
@@ -65,6 +64,22 @@ BlockFileCacheTtlMgr::~BlockFileCacheTtlMgr() {
}
}
+void BlockFileCacheTtlMgr::resume() {
+ std::lock_guard<std::mutex> lifecycle_lock(_thread_lifecycle_mutex);
+ if (_update_ttl_thread.joinable() || _expiration_check_thread.joinable() ||
+ _tablet_id_flush_thread.joinable()) {
+ return;
+ }
+
+ _stop_background.store(false, std::memory_order_release);
+ _update_ttl_thread =
+
std::thread(&BlockFileCacheTtlMgr::run_backgroud_update_ttl_info_map, this);
+ _expiration_check_thread =
+ std::thread(&BlockFileCacheTtlMgr::run_backgroud_expiration_check,
this);
+ _tablet_id_flush_thread =
+ std::thread(&BlockFileCacheTtlMgr::run_background_tablet_id_flush,
this);
+}
+
void BlockFileCacheTtlMgr::register_tablet_id(int64_t tablet_id) {
_tablet_id_queue.enqueue(tablet_id);
}
@@ -292,4 +307,4 @@ void BlockFileCacheTtlMgr::run_backgroud_expiration_check()
{
}
}
-} // namespace doris::io
\ No newline at end of file
+} // namespace doris::io
diff --git a/be/src/io/cache/block_file_cache_ttl_mgr.h
b/be/src/io/cache/block_file_cache_ttl_mgr.h
index b16b5486b4b..8cd677446f1 100644
--- a/be/src/io/cache/block_file_cache_ttl_mgr.h
+++ b/be/src/io/cache/block_file_cache_ttl_mgr.h
@@ -48,6 +48,8 @@ public:
~BlockFileCacheTtlMgr();
void register_tablet_id(int64_t tablet_id);
+ void stop();
+ void resume();
// Background thread to update ttl_info_map
void run_backgroud_update_ttl_info_map();
@@ -73,10 +75,11 @@ private:
std::thread _update_ttl_thread;
std::thread _expiration_check_thread;
std::thread _tablet_id_flush_thread;
+ std::mutex _thread_lifecycle_mutex;
std::mutex _ttl_info_mutex;
std::shared_ptr<bvar::Status<size_t>> _tablet_id_set_size_metrics;
};
-} // namespace doris::io
\ No newline at end of file
+} // namespace doris::io
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]