freemandealer commented on code in PR #43440:
URL: https://github.com/apache/doris/pull/43440#discussion_r1833921121


##########
be/src/http/action/file_cache_action.cpp:
##########
@@ -110,6 +120,16 @@ Status FileCacheAction::_handle_header(HttpRequest* req, 
std::string* json_metri
             json[HASH.data()] = ret.to_string();
             *json_metrics = json.ToString();
         }
+    } else if (operation == VIEW) {
+        auto all_cache_base_path = 
io::FileCacheFactory::instance()->get_base_paths();
+        auto json_array = get_json_array(all_cache_base_path);
+        *json_metrics = json_array.ToString();
+    } else if (operation == CHECK) {
+        const std::string& cache_base_path = req->param(BASE_PATH.data());
+        auto* block_file_cache = 
io::FileCacheFactory::instance()->get_by_path(cache_base_path);

Review Comment:
   handle the error if the path is invalid. otherwise this will crash into 
nullptr.



##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -2084,4 +2084,28 @@ std::map<std::string, double> 
BlockFileCache::get_stats_unsafe() {
 template void BlockFileCache::remove(FileBlockSPtr file_block,
                                      std::lock_guard<std::mutex>& cache_lock,
                                      std::lock_guard<std::mutex>& block_lock, 
bool sync);
+
+std::vector<std::string> BlockFileCache::check_file_cache_consistency() {
+    std::unordered_set<AccessKeyAndOffset, KeyAndOffsetHash> confirmed_blocks;
+    std::vector<InconsistentCacheContext> inconsistent_cache_context;
+    std::lock_guard<std::mutex> lock(_mutex);

Review Comment:
   naming advice: lock -> cache_lock  for efficient code search.



##########
be/src/io/cache/file_cache_common.h:
##########
@@ -134,4 +137,51 @@ struct CacheContext {
     bool is_cold_data {false};
 };
 
+struct InconsistentCacheContext {
+    UInt128Wrapper hash;
+    int64_t expiration_time;
+    size_t offset;
+    FileCacheType cache_type;
+    class InconsistentType {
+        uint32_t type;
+
+    public:
+        enum : uint32_t {
+            NONE = 0,
+            FILES_INCONSISTENT = 1 << 0,

Review Comment:
   the description for each type is a necessary here



##########
be/src/io/cache/file_cache_common.h:
##########
@@ -134,4 +137,51 @@ struct CacheContext {
     bool is_cold_data {false};
 };
 
+struct InconsistentCacheContext {
+    UInt128Wrapper hash;
+    int64_t expiration_time;
+    size_t offset;
+    FileCacheType cache_type;
+    class InconsistentType {
+        uint32_t type;
+
+    public:
+        enum : uint32_t {
+            NONE = 0,
+            FILES_INCONSISTENT = 1 << 0,
+            STORAGE_INCONSISTENT = 1 << 1,
+            SIZE_INCONSISTENT = 1 << 2,
+            CACHE_TYPE_INCONSISTENT = 1 << 3,

Review Comment:
   ditto, may be 'NOT_LOADED'?



##########
be/src/http/action/file_cache_action.cpp:
##########
@@ -50,6 +50,8 @@ constexpr static std::string_view CLEAR = "clear";
 constexpr static std::string_view RESET = "reset";
 constexpr static std::string_view HASH = "hash";
 constexpr static std::string_view LIST_CACHE = "list_cache";
+constexpr static std::string_view VIEW = "view";

Review Comment:
   list_base_paths might be more explicit



##########
be/src/io/cache/file_cache_common.cpp:
##########
@@ -86,4 +101,14 @@ FileBlocksHolderPtr 
FileCacheAllocatorBuilder::allocate_cache_holder(size_t offs
     return std::make_unique<FileBlocksHolder>(std::move(holder));
 }
 
+std::string InconsistentCacheContext::to_string() const {
+    std::stringstream ss;
+    ss << "InconsistentCacheContext:\n"
+       << "Hash: " << hash.to_string() << "\n"
+       << "Expiration Time: " << expiration_time << "\n"
+       << "Offset: " << offset << "\n"
+       << "Cache Type: " << file_cache_type_to_string(cache_type) << "\n"
+       << "Inconsistent Type: " << inconsistent_type.to_string();

Review Comment:
   "Reason:"



##########
be/src/io/cache/file_cache_common.h:
##########
@@ -134,4 +137,51 @@ struct CacheContext {
     bool is_cold_data {false};
 };
 
+struct InconsistentCacheContext {
+    UInt128Wrapper hash;
+    int64_t expiration_time;
+    size_t offset;
+    FileCacheType cache_type;
+    class InconsistentType {
+        uint32_t type;
+
+    public:
+        enum : uint32_t {

Review Comment:
   very compact, that's nice...



##########
be/src/io/cache/file_cache_common.h:
##########
@@ -134,4 +137,51 @@ struct CacheContext {
     bool is_cold_data {false};
 };
 
+struct InconsistentCacheContext {
+    UInt128Wrapper hash;
+    int64_t expiration_time;
+    size_t offset;
+    FileCacheType cache_type;
+    class InconsistentType {
+        uint32_t type;
+
+    public:
+        enum : uint32_t {
+            NONE = 0,
+            FILES_INCONSISTENT = 1 << 0,
+            STORAGE_INCONSISTENT = 1 << 1,

Review Comment:
   "INCONSISTENT" is bidirectional. Might 'MISSING_IN_STORAGE' be more explicit?



##########
be/src/io/cache/fs_file_cache_storage.cpp:
##########
@@ -588,6 +588,114 @@ void 
FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) const
     TEST_SYNC_POINT_CALLBACK("BlockFileCache::TmpFile2");
 }
 
+void FSFileCacheStorage::check_consistency(
+        BlockFileCache* _mgr,
+        std::unordered_set<AccessKeyAndOffset, KeyAndOffsetHash>& 
confirmed_blocks,
+        std::vector<InconsistentCacheContext>& inconsistent_cache_context,
+        std::lock_guard<std::mutex>& cache_lock) const {
+    std::error_code ec;
+    auto check = [_mgr, &cache_lock, &confirmed_blocks, 
&inconsistent_cache_context,
+                  this](std::filesystem::directory_iterator key_it) {
+        for (; key_it != std::filesystem::directory_iterator(); ++key_it) {
+            auto key_with_suffix = key_it->path().filename().native();
+            auto delim_pos = key_with_suffix.find('_');
+            DCHECK(delim_pos != std::string::npos);
+            std::string key_str = key_with_suffix.substr(0, delim_pos);
+            std::string expiration_time_str = key_with_suffix.substr(delim_pos 
+ 1);
+            auto hash = 
UInt128Wrapper(vectorized::unhex_uint<uint128_t>(key_str.c_str()));
+            std::error_code ec;
+            std::filesystem::directory_iterator offset_it(key_it->path(), ec);
+            if (ec) [[unlikely]] {
+                LOG(WARNING) << "filesystem error, failed to remove file, 
file=" << key_it->path()
+                             << " error=" << ec.message();
+                continue;
+            }
+            long expiration_time = std::stoul(expiration_time_str);
+            for (; offset_it != std::filesystem::directory_iterator(); 
++offset_it) {
+                size_t size = offset_it->file_size(ec);
+                size_t offset = 0;
+                bool is_tmp = false;
+                FileCacheType cache_type = FileCacheType::NORMAL;
+                if (!this->parse_filename_suffix_to_cache_type(
+                            fs, offset_it->path().filename().native(), 
expiration_time, size,
+                            &offset, &is_tmp, &cache_type)) {

Review Comment:
   if is_tmp == true, the type will never be assigned (always be the default 
value `FileCacheType::NORMAL`). check the internal of 
`parse_filename_suffix_to_cache_type` for details. 
   
   maybe add `TMP` as another 'type' in 
`file_cache_type_to_string(be/src/io/cache/file_cache_common.cpp )`.  As I see 
it, there is no counterpart in management structure in memory. Maybe we can 
deal with TMP like this:
   if the state is 'DOWNLOADING', it's consistent. otherwise, inconsistent.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to