Re: [PR] Filecache query limit：bugfix and enhancement [doris]

via GitHub Sat, 18 Oct 2025 03:51:37 -0700


wenzhenghu commented on code in PR #55772:
URL: https://github.com/apache/doris/pull/55772#discussion_r2386484331



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -1018,73 +1035,19 @@ bool BlockFileCache::try_reserve(const UInt128Wrapper& 
hash, const CacheContext&
     int64_t cur_time = std::chrono::duration_cast<std::chrono::seconds>(
                                
std::chrono::steady_clock::now().time_since_epoch())
                                .count();
-    auto& queue = get_queue(context.cache_type);
-    size_t removed_size = 0;
-    size_t ghost_remove_size = 0;
-    size_t queue_size = queue.get_capacity(cache_lock);
-    size_t cur_cache_size = _cur_cache_size;
-    size_t query_context_cache_size = 
query_context->get_cache_size(cache_lock);
-
-    std::vector<LRUQueue::Iterator> ghost;
-    std::vector<FileBlockCell*> to_evict;
-
-    size_t max_size = queue.get_max_size();
-    auto is_overflow = [&] {
-        return _disk_resource_limit_mode ? removed_size < size
-                                         : cur_cache_size + size - 
removed_size > _capacity ||
-                                                   (queue_size + size - 
removed_size > max_size) ||
-                                                   (query_context_cache_size + 
size -
-                                                            (removed_size + 
ghost_remove_size) >
-                                                    
query_context->get_max_cache_size());
-    };
-
-    /// Select the cache from the LRU queue held by query for expulsion.
-    for (auto iter = query_context->queue().begin(); iter != 
query_context->queue().end(); iter++) {
-        if (!is_overflow()) {
-            break;
-        }
-
-        auto* cell = get_cell(iter->hash, iter->offset, cache_lock);
 
-        if (!cell) {
-            /// The cache corresponding to this record may be swapped out by
-            /// other queries, so it has become invalid.
-            ghost.push_back(iter);
-            ghost_remove_size += iter->size;
-        } else {
-            size_t cell_size = cell->size();
-            DCHECK(iter->size == cell_size);
-
-            if (cell->releasable()) {
-                auto& file_block = cell->file_block;
-
-                std::lock_guard block_lock(file_block->_mutex);
-                DCHECK(file_block->_download_state == 
FileBlock::State::DOWNLOADED);
-                to_evict.push_back(cell);
-                removed_size += cell_size;
-            }
-        }
-    }
-
-    auto remove_file_block_if = [&](FileBlockCell* cell) {
-        FileBlockSPtr file_block = cell->file_block;
-        if (file_block) {
-            query_context->remove(file_block->get_hash_value(), 
file_block->offset(), cache_lock);
-            std::lock_guard block_lock(file_block->_mutex);
-            remove(file_block, cache_lock, block_lock);
-        }
-    };
-
-    for (auto& iter : ghost) {
-        query_context->remove(iter->hash, iter->offset, cache_lock);
-    }
-
-    std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
-
-    if (is_overflow() &&
-        !try_reserve_from_other_queue(context.cache_type, size, cur_time, 
cache_lock)) {
+    bool other_queue_success =
+            config::file_cache_query_limit_enable_evict_from_other_queue
+                    ? try_reserve_from_other_queue(context.cache_type, size, 
cur_time, cache_lock)
+                    : false;
+    if (!other_queue_success) {
+        LOG(WARNING) << "Failed to reserve space after exhausting all eviction 
strategies: "
+                     << "query_limit_enable_evict_from_other_queue="
+                     << 
config::file_cache_query_limit_enable_evict_from_other_queue
+                     << ", hash=" << hash.to_string() << ", offset=" << offset 
<< ", size=" << size;
         return false;

Review Comment:
   > 换句话说，这里上面删掉的代码，应该是值得保留的。
   
   正宇，这段代码刚开始是保留的，后来我们做了一些测试后才去掉的。主要是性能上的考虑：
   如果允许在query内部队列进行淘汰，假设limit的值比query要读取的数据小很多（假设是3倍），那会导致超出limit后的每次query 
io都会走query队列淘汰，刚加入队列的数据迅速被换出，一直重复缓存未命中+缓存填充的逻辑，由于这个操作相比直接远程读io还多了一步缓存写io的操作，其实比直接远程读更慢，关键是刚刚牺牲性能的缓存填充操作又迅速因为队列空间不够而淘汰掉。会导致查询性能降级很厉害。
   所以我们才改为仅缓存query刚开始读取的那些数据块，队列满了后直接变成remote read。



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Filecache query limit：bugfix and enhancement [doris]

Reply via email to