gavinchou commented on code in PR #46404: URL: https://github.com/apache/doris/pull/46404#discussion_r1901904553
########## be/src/io/cache/block_file_cache.cpp: ########## @@ -562,7 +561,7 @@ std::string BlockFileCache::clear_file_cache_async() { void BlockFileCache::recycle_deleted_blocks() { Review Comment: UT should be added to make it fully tested ########## be/src/io/cache/block_file_cache.cpp: ########## @@ -573,45 +572,34 @@ void BlockFileCache::recycle_deleted_blocks() { int i = 0; std::condition_variable cond; auto start_time = steady_clock::time_point(); - if (_async_clear_file_cache) { - LOG_INFO("Start clear file cache async").tag("path", _cache_base_path); - auto remove_file_block = [&cache_lock, this](FileBlockCell* cell) { - std::lock_guard segment_lock(cell->file_block->_mutex); - remove(cell->file_block, cache_lock, segment_lock); - }; - static int remove_batch = 100; - TEST_SYNC_POINT_CALLBACK("BlockFileCache::set_remove_batch", &remove_batch); - int i = 0; - std::condition_variable cond; - auto iter_queue = [&](LRUQueue& queue) { - bool end = false; - while (queue.get_capacity(cache_lock) != 0 && !end) { - std::vector<FileBlockCell*> cells; - for (const auto& [entry_key, entry_offset, _] : queue) { - if (i == remove_batch) { - i = 0; - break; - } - auto* cell = get_cell(entry_key, entry_offset, cache_lock); - if (!cell) continue; - if (!cell->is_deleted) { - end = true; - break; - } else if (cell->releasable()) { - i++; - cells.push_back(cell); - } + + LOG_INFO("Start clear file cache async").tag("path", _cache_base_path); + auto iter_queue = [&](LRUQueue& queue) { + bool end = false; + while (queue.get_capacity(cache_lock) != 0 && !end) { + std::vector<FileBlockCell*> cells; + for (const auto& [entry_key, entry_offset, _] : queue) { Review Comment: it seems that we should not iterate from the beginning again? there may be performance penalty if there are lots of running query while we are deleting elements that are far from the head. can we iterate from rbegin() to get better performance? ########## be/src/io/cache/block_file_cache.cpp: ########## @@ -630,14 +618,11 @@ void BlockFileCache::recycle_deleted_blocks() { cell.is_deleted = cell.is_deleted Review Comment: break it into multiple lines ``` using namespace std::chrono; // this line can be put before L605 `for (auto&key...` auto now = duration_cast<seconds>(system_clock::now().time_since_epoch()).count(); // this line can be put before L605 `for (auto&key...` bool is_ttl_expired = now - cell.atime > config::file_cache_ttl_valid_check_interval_second; cell.is_deleted = cell.is_deleted || is_ttl_expired; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org