freemandealer commented on code in PR #43440: URL: https://github.com/apache/doris/pull/43440#discussion_r1834020476
########## be/src/io/cache/fs_file_cache_storage.cpp: ########## @@ -588,6 +588,114 @@ void FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) const TEST_SYNC_POINT_CALLBACK("BlockFileCache::TmpFile2"); } +void FSFileCacheStorage::check_consistency( + BlockFileCache* _mgr, + std::unordered_set<AccessKeyAndOffset, KeyAndOffsetHash>& confirmed_blocks, + std::vector<InconsistentCacheContext>& inconsistent_cache_context, + std::lock_guard<std::mutex>& cache_lock) const { + std::error_code ec; + auto check = [_mgr, &cache_lock, &confirmed_blocks, &inconsistent_cache_context, + this](std::filesystem::directory_iterator key_it) { + for (; key_it != std::filesystem::directory_iterator(); ++key_it) { + auto key_with_suffix = key_it->path().filename().native(); + auto delim_pos = key_with_suffix.find('_'); + DCHECK(delim_pos != std::string::npos); + std::string key_str = key_with_suffix.substr(0, delim_pos); + std::string expiration_time_str = key_with_suffix.substr(delim_pos + 1); + auto hash = UInt128Wrapper(vectorized::unhex_uint<uint128_t>(key_str.c_str())); + std::error_code ec; + std::filesystem::directory_iterator offset_it(key_it->path(), ec); + if (ec) [[unlikely]] { + LOG(WARNING) << "filesystem error, failed to remove file, file=" << key_it->path() + << " error=" << ec.message(); + continue; + } + long expiration_time = std::stoul(expiration_time_str); + for (; offset_it != std::filesystem::directory_iterator(); ++offset_it) { + size_t size = offset_it->file_size(ec); + size_t offset = 0; + bool is_tmp = false; + FileCacheType cache_type = FileCacheType::NORMAL; + if (!this->parse_filename_suffix_to_cache_type( + fs, offset_it->path().filename().native(), expiration_time, size, + &offset, &is_tmp, &cache_type)) { Review Comment: if is_tmp == true, the type will never be assigned (always be the default value `FileCacheType::NORMAL`). check the internal of `parse_filename_suffix_to_cache_type` for details. maybe add `TMP` as another 'type' in `file_cache_type_to_string(be/src/io/cache/file_cache_common.cpp )`. As I see it, there is no counterpart in management structure in memory. Maybe we can deal with TMP like this: if a tmp file with state 'DOWNLOADING', it's consistent. otherwise, inconsistent. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org