gavinchou commented on code in PR #42451:
URL: https://github.com/apache/doris/pull/42451#discussion_r1825785248


##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -86,6 +86,94 @@ BlockFileCache::BlockFileCache(const std::string& 
cache_base_path,
     _total_evict_size_metrics = std::make_shared<bvar::Adder<size_t>>(
             _cache_base_path.c_str(), "file_cache_total_evict_size");
 
+    
_evict_by_heat_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::NORMAL] 
=
+            std::make_shared<bvar::Adder<size_t>>(_cache_base_path.c_str(),
+                                                  
"file_cache_evict_by_heat_disposable_to_normal");

Review Comment:
   "file_cache_evict_by_heat" -> "file_cache_evict_by_time"



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -86,6 +86,94 @@ BlockFileCache::BlockFileCache(const std::string& 
cache_base_path,
     _total_evict_size_metrics = std::make_shared<bvar::Adder<size_t>>(
             _cache_base_path.c_str(), "file_cache_total_evict_size");
 
+    
_evict_by_heat_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::NORMAL] 
=

Review Comment:
   there may be too many metrics if we specify multiple cache_path,
   it may/should be considered.



##########
be/src/io/cache/file_cache_common.h:
##########
@@ -26,17 +26,17 @@ namespace doris::io {
 
 inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 
* 1024;
 inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 
1024;
-inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 85;
-inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 10;
+inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40;

Review Comment:
   do we respect these percentages when upgrade from existing file cache data?
   if yes, there may be cache evict. if no, the starting procedure cannot limit 
each queue size.



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -109,14 +197,16 @@ BlockFileCache::BlockFileCache(const std::string& 
cache_base_path,
                                                            
"file_cache_hit_ratio_5m", 0.0);
     _hit_ratio_1h = 
std::make_shared<bvar::Status<double>>(_cache_base_path.c_str(),
                                                            
"file_cache_hit_ratio_1h", 0.0);
+    _disk_limit_mode_metrics =
+            std::make_shared<bvar::Status<size_t>>(_cache_base_path.c_str(), 
"disk_limit_mode", 0);

Review Comment:
   "disk_limit_mode" -> "file_cache_disk_limit_mode"



##########
be/src/io/cache/file_cache_common.h:
##########
@@ -26,17 +26,17 @@ namespace doris::io {
 
 inline static constexpr size_t REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS = 100 
* 1024;
 inline static constexpr size_t FILE_CACHE_MAX_FILE_BLOCK_SIZE = 1 * 1024 * 
1024;
-inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 85;
-inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 10;
+inline static constexpr size_t DEFAULT_NORMAL_PERCENT = 40;
+inline static constexpr size_t DEFAULT_DISPOSABLE_PERCENT = 5;
 inline static constexpr size_t DEFAULT_INDEX_PERCENT = 5;
 
 using uint128_t = vectorized::UInt128;
 
-enum class FileCacheType {
-    INDEX,
-    NORMAL,
-    DISPOSABLE,
-    TTL,
+enum FileCacheType {

Review Comment:
   keep enum class, and unwrap the underlying types in block_file_cache.cpp 
when initialize bvars. e.g.
   try
   ```
   consteval underlying_type_t<FileCacheType> foo(FileCacheType t) {
       return t;
   }
   ...
   evcit[foo(INDEX)][foo[NORMAL]]
   evcit[foo(NORMAL)][foo[INDEX]]
   ...
   ```



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -1203,39 +1313,48 @@ bool 
BlockFileCache::try_reserve_from_other_queue_by_hot_interval(
                 DCHECK(file_block->_download_state == 
FileBlock::State::DOWNLOADED);
                 to_evict.push_back(cell);
                 removed_size += cell_size;
+                remove_size_per_type += cell_size;
             }
         }
+        *(_evict_by_heat_metrics_matrix[cache_type][cur_type]) << 
remove_size_per_type;
     }
     remove_file_blocks(to_evict, cache_lock);
 
     return !is_overflow(removed_size, size, cur_cache_size);
 }
 
-bool BlockFileCache::is_overflow(size_t removed_size, size_t need_size, size_t 
cur_cache_size,
-                                 bool is_ttl) const {
+bool BlockFileCache::is_overflow(size_t removed_size, size_t need_size,
+                                 size_t cur_cache_size) const {
     bool ret = false;
     if (_disk_resource_limit_mode) {
         ret = (removed_size < need_size);
     } else {
         ret = (cur_cache_size + need_size - removed_size > _capacity);
     }
-    if (is_ttl) {
-        size_t ttl_threshold = config::max_ttl_cache_ratio * _capacity / 100;
-        return (ret || ((cur_cache_size + need_size - removed_size) > 
ttl_threshold));
-    }
     return ret;
 }
 
 bool BlockFileCache::try_reserve_from_other_queue_by_size(
-        std::vector<FileCacheType> other_cache_types, size_t size,
+        FileCacheType cur_type, std::vector<FileCacheType> other_cache_types, 
size_t size,
         std::lock_guard<std::mutex>& cache_lock) {
     size_t removed_size = 0;
     size_t cur_cache_size = _cur_cache_size;
     std::vector<FileBlockCell*> to_evict;
+    // we follow the privilege defined in get_other_cache_types to evict

Review Comment:
   try to rename `get_other_cache_types` with more meaningful name e.g. 
`get_other_cache_types_to_evict`



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -109,14 +197,16 @@ BlockFileCache::BlockFileCache(const std::string& 
cache_base_path,
                                                            
"file_cache_hit_ratio_5m", 0.0);
     _hit_ratio_1h = 
std::make_shared<bvar::Status<double>>(_cache_base_path.c_str(),
                                                            
"file_cache_hit_ratio_1h", 0.0);
+    _disk_limit_mode_metrics =
+            std::make_shared<bvar::Status<size_t>>(_cache_base_path.c_str(), 
"disk_limit_mode", 0);

Review Comment:
   suggest naming `disk_limit_mode` -> `file_cache_disk_space_limited`
   also consider add `file_cache_disk_inode_limited`
   



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -86,6 +86,94 @@ BlockFileCache::BlockFileCache(const std::string& 
cache_base_path,
     _total_evict_size_metrics = std::make_shared<bvar::Adder<size_t>>(
             _cache_base_path.c_str(), "file_cache_total_evict_size");
 
+    
_evict_by_heat_metrics_matrix[FileCacheType::DISPOSABLE][FileCacheType::NORMAL] 
=
+            std::make_shared<bvar::Adder<size_t>>(_cache_base_path.c_str(),
+                                                  
"file_cache_evict_by_heat_disposable_to_normal");

Review Comment:
   suggest naming "file_cache_evict_by_heat_disposable_to_normal" -> 
"file_cache_evict_by_time_disposable_to_normal"



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -1579,6 +1700,7 @@ std::string BlockFileCache::reset_capacity(size_t 
new_capacity) {
                 ss << " ttl_queue released " << queue_released;
             }
             _disk_resource_limit_mode = true;
+            _disk_limit_mode_metrics->set_value(1);

Review Comment:
   consider setting it to percentage of utilization of the disk if it enterred 
this mode.
   `_disk_limit_mode_metrics` -> `_disk_size_limit_`
   and we can also monitor the size utilization and the inode untilization 
respectively.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to