cambyzju commented on code in PR #15114: URL: https://github.com/apache/doris/pull/15114#discussion_r1052102987
########## be/src/io/cache/file_cache.cpp: ########## @@ -87,32 +89,103 @@ Status FileCache::download_cache_to_local(const Path& cache_file, const Path& ca return Status::OK(); } -Status FileCache::_remove_file(const Path& cache_file, const Path& cache_done_file, - size_t* cleaned_size) { - bool done_file_exist = false; - RETURN_NOT_OK_STATUS_WITH_WARN( - io::global_local_filesystem()->exists(cache_done_file, &done_file_exist), - "Check local done file exist failed."); - if (done_file_exist) { - RETURN_NOT_OK_STATUS_WITH_WARN( - io::global_local_filesystem()->delete_file(cache_done_file), - fmt::format("Delete local done file failed: {}", cache_done_file.native())); - } +Status FileCache::_remove_file(const Path& file, size_t* cleaned_size) { bool cache_file_exist = false; - RETURN_NOT_OK_STATUS_WITH_WARN( - io::global_local_filesystem()->exists(cache_file, &cache_file_exist), - "Check local cache file exist failed."); + RETURN_NOT_OK_STATUS_WITH_WARN(io::global_local_filesystem()->exists(file, &cache_file_exist), + "Check local cache file exist failed."); if (cache_file_exist) { if (cleaned_size) { RETURN_NOT_OK_STATUS_WITH_WARN( - io::global_local_filesystem()->file_size(cache_file, cleaned_size), - fmt::format("get local cache file size failed: {}", cache_file.native())); + io::global_local_filesystem()->file_size(file, cleaned_size), + fmt::format("get local cache file size failed: {}", file.native())); } RETURN_NOT_OK_STATUS_WITH_WARN( - io::global_local_filesystem()->delete_file(cache_file), - fmt::format("Delete local cache file failed: {}", cache_file.native())); + io::global_local_filesystem()->delete_file(file), + fmt::format("Delete local cache file failed: {}", file.native())); + } + LOG(INFO) << "Delete local cache file successfully: " << file.native(); + return Status::OK(); +} + +Status FileCache::_remove_cache_and_done(const Path& cache_file, const Path& cache_done_file, + size_t* cleaned_size) { + RETURN_IF_ERROR(_remove_file(cache_done_file, nullptr)); + RETURN_IF_ERROR(_remove_file(cache_file, cleaned_size)); + return Status::OK(); +} + +Status FileCache::_get_dir_file_and_remove_unfinished(const Path& cache_dir, + std::vector<Path>& cache_names) { + bool cache_dir_exist = true; + RETURN_NOT_OK_STATUS_WITH_WARN( + io::global_local_filesystem()->exists(cache_dir, &cache_dir_exist), + fmt::format("Check local cache dir exist failed. {}", cache_dir.native())); + if (!cache_dir_exist) { + return Status::OK(); + } + + // list all files + std::vector<Path> cache_file_names; + RETURN_NOT_OK_STATUS_WITH_WARN( + io::global_local_filesystem()->list(cache_dir, &cache_file_names), + fmt::format("List dir failed: {}", cache_dir.native())) + + // separate DATA file and DONE file + std::set<Path> cache_names_temp; + std::list<Path> done_names_temp; + for (auto& cache_file_name : cache_file_names) { + if (ends_with(cache_file_name.native(), CACHE_DONE_FILE_SUFFIX)) { + done_names_temp.push_back(std::move(cache_file_name)); + } else { + cache_names_temp.insert(std::move(cache_file_name)); + } + } + + // match DONE file with DATA file + for (auto done_file : done_names_temp) { + Path cache_filename = StringReplace(done_file.native(), CACHE_DONE_FILE_SUFFIX, "", true); + if (auto cache_iter = cache_names_temp.find(cache_filename); + cache_iter != cache_names_temp.end()) { + cache_names_temp.erase(cache_iter); + cache_names.push_back(std::move(cache_filename)); + } else { + // not data file, but with DONE file + RETURN_IF_ERROR(_remove_file(done_file, nullptr)); + } + } + // data file without DONE file + for (auto& file : cache_names_temp) { + RETURN_IF_ERROR(_remove_file(file, nullptr)); + } + return Status::OK(); +} + +Status FileCache::_clean_unfinished_files(const std::vector<Path>& unfinished_files) { + // remove cache file without done file + for (auto file : unfinished_files) { Review Comment: ```suggestion for (const auto& file : unfinished_files) { ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org