freemandealer commented on code in PR #43440: URL: https://github.com/apache/doris/pull/43440#discussion_r1833921121
########## be/src/http/action/file_cache_action.cpp: ########## @@ -110,6 +120,16 @@ Status FileCacheAction::_handle_header(HttpRequest* req, std::string* json_metri json[HASH.data()] = ret.to_string(); *json_metrics = json.ToString(); } + } else if (operation == VIEW) { + auto all_cache_base_path = io::FileCacheFactory::instance()->get_base_paths(); + auto json_array = get_json_array(all_cache_base_path); + *json_metrics = json_array.ToString(); + } else if (operation == CHECK) { + const std::string& cache_base_path = req->param(BASE_PATH.data()); + auto* block_file_cache = io::FileCacheFactory::instance()->get_by_path(cache_base_path); Review Comment: handle the error if the path is invalid. otherwise this will crash into nullptr. ########## be/src/io/cache/block_file_cache.cpp: ########## @@ -2084,4 +2084,28 @@ std::map<std::string, double> BlockFileCache::get_stats_unsafe() { template void BlockFileCache::remove(FileBlockSPtr file_block, std::lock_guard<std::mutex>& cache_lock, std::lock_guard<std::mutex>& block_lock, bool sync); + +std::vector<std::string> BlockFileCache::check_file_cache_consistency() { + std::unordered_set<AccessKeyAndOffset, KeyAndOffsetHash> confirmed_blocks; + std::vector<InconsistentCacheContext> inconsistent_cache_context; + std::lock_guard<std::mutex> lock(_mutex); Review Comment: naming advice: lock -> cache_lock for efficient code search. ########## be/src/io/cache/file_cache_common.h: ########## @@ -134,4 +137,51 @@ struct CacheContext { bool is_cold_data {false}; }; +struct InconsistentCacheContext { + UInt128Wrapper hash; + int64_t expiration_time; + size_t offset; + FileCacheType cache_type; + class InconsistentType { + uint32_t type; + + public: + enum : uint32_t { + NONE = 0, + FILES_INCONSISTENT = 1 << 0, Review Comment: the description for each type is a necessary here ########## be/src/io/cache/file_cache_common.h: ########## @@ -134,4 +137,51 @@ struct CacheContext { bool is_cold_data {false}; }; +struct InconsistentCacheContext { + UInt128Wrapper hash; + int64_t expiration_time; + size_t offset; + FileCacheType cache_type; + class InconsistentType { + uint32_t type; + + public: + enum : uint32_t { + NONE = 0, + FILES_INCONSISTENT = 1 << 0, + STORAGE_INCONSISTENT = 1 << 1, + SIZE_INCONSISTENT = 1 << 2, + CACHE_TYPE_INCONSISTENT = 1 << 3, Review Comment: ditto, may be 'NOT_LOADED'? ########## be/src/http/action/file_cache_action.cpp: ########## @@ -50,6 +50,8 @@ constexpr static std::string_view CLEAR = "clear"; constexpr static std::string_view RESET = "reset"; constexpr static std::string_view HASH = "hash"; constexpr static std::string_view LIST_CACHE = "list_cache"; +constexpr static std::string_view VIEW = "view"; Review Comment: list_base_paths might be more explicit ########## be/src/io/cache/file_cache_common.cpp: ########## @@ -86,4 +101,14 @@ FileBlocksHolderPtr FileCacheAllocatorBuilder::allocate_cache_holder(size_t offs return std::make_unique<FileBlocksHolder>(std::move(holder)); } +std::string InconsistentCacheContext::to_string() const { + std::stringstream ss; + ss << "InconsistentCacheContext:\n" + << "Hash: " << hash.to_string() << "\n" + << "Expiration Time: " << expiration_time << "\n" + << "Offset: " << offset << "\n" + << "Cache Type: " << file_cache_type_to_string(cache_type) << "\n" + << "Inconsistent Type: " << inconsistent_type.to_string(); Review Comment: "Reason:" ########## be/src/io/cache/file_cache_common.h: ########## @@ -134,4 +137,51 @@ struct CacheContext { bool is_cold_data {false}; }; +struct InconsistentCacheContext { + UInt128Wrapper hash; + int64_t expiration_time; + size_t offset; + FileCacheType cache_type; + class InconsistentType { + uint32_t type; + + public: + enum : uint32_t { Review Comment: very compact, that's nice... ########## be/src/io/cache/file_cache_common.h: ########## @@ -134,4 +137,51 @@ struct CacheContext { bool is_cold_data {false}; }; +struct InconsistentCacheContext { + UInt128Wrapper hash; + int64_t expiration_time; + size_t offset; + FileCacheType cache_type; + class InconsistentType { + uint32_t type; + + public: + enum : uint32_t { + NONE = 0, + FILES_INCONSISTENT = 1 << 0, + STORAGE_INCONSISTENT = 1 << 1, Review Comment: "INCONSISTENT" is bidirectional. Might 'MISSING_IN_STORAGE' be more explicit? ########## be/src/io/cache/fs_file_cache_storage.cpp: ########## @@ -588,6 +588,114 @@ void FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) const TEST_SYNC_POINT_CALLBACK("BlockFileCache::TmpFile2"); } +void FSFileCacheStorage::check_consistency( + BlockFileCache* _mgr, + std::unordered_set<AccessKeyAndOffset, KeyAndOffsetHash>& confirmed_blocks, + std::vector<InconsistentCacheContext>& inconsistent_cache_context, + std::lock_guard<std::mutex>& cache_lock) const { + std::error_code ec; + auto check = [_mgr, &cache_lock, &confirmed_blocks, &inconsistent_cache_context, + this](std::filesystem::directory_iterator key_it) { + for (; key_it != std::filesystem::directory_iterator(); ++key_it) { + auto key_with_suffix = key_it->path().filename().native(); + auto delim_pos = key_with_suffix.find('_'); + DCHECK(delim_pos != std::string::npos); + std::string key_str = key_with_suffix.substr(0, delim_pos); + std::string expiration_time_str = key_with_suffix.substr(delim_pos + 1); + auto hash = UInt128Wrapper(vectorized::unhex_uint<uint128_t>(key_str.c_str())); + std::error_code ec; + std::filesystem::directory_iterator offset_it(key_it->path(), ec); + if (ec) [[unlikely]] { + LOG(WARNING) << "filesystem error, failed to remove file, file=" << key_it->path() + << " error=" << ec.message(); + continue; + } + long expiration_time = std::stoul(expiration_time_str); + for (; offset_it != std::filesystem::directory_iterator(); ++offset_it) { + size_t size = offset_it->file_size(ec); + size_t offset = 0; + bool is_tmp = false; + FileCacheType cache_type = FileCacheType::NORMAL; + if (!this->parse_filename_suffix_to_cache_type( + fs, offset_it->path().filename().native(), expiration_time, size, + &offset, &is_tmp, &cache_type)) { Review Comment: if is_tmp == true, the type will never be assigned (always be the default value `FileCacheType::NORMAL`). check the internal of `parse_filename_suffix_to_cache_type` for details. maybe add `TMP` as another 'type' in `file_cache_type_to_string(be/src/io/cache/file_cache_common.cpp )`. As I see it, there is no counterpart in management structure in memory. Maybe we can deal with TMP like this: if the state is 'DOWNLOADING', it's consistent. otherwise, inconsistent. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org