gavinchou commented on code in PR #50726: URL: https://github.com/apache/doris/pull/50726#discussion_r2099588169
########## be/src/io/cache/fs_file_cache_storage.cpp: ########## @@ -307,6 +328,98 @@ std::string FSFileCacheStorage::get_path_in_local_cache(const UInt128Wrapper& va } } +void FSFileCacheStorage::remove_old_version_directories() { + std::error_code ec; + std::filesystem::directory_iterator key_it {_cache_base_path, ec}; + if (ec) { + LOG(WARNING) << "Failed to list directory: " << _cache_base_path + << ", error: " << ec.message(); + return; + } + + std::vector<std::filesystem::path> file_list; + // the dir is concurrently accessed, so handle invalid iter with retry + bool success = false; + size_t retry_count = 0; + const size_t max_retry = 30; + while (!success && retry_count < max_retry) { + try { + ++retry_count; + for (; key_it != std::filesystem::directory_iterator(); ++key_it) { + file_list.push_back(key_it->path()); + } + success = true; + } catch (const std::exception& e) { + LOG(WARNING) << "Error occurred while iterating directory: " << e.what(); + file_list.clear(); + } + } + + if (!success) { + LOG_WARNING("iteration of cache dir still failed after retry {} times.", max_retry); + } + + auto path_itr = file_list.begin(); + for (; path_itr != file_list.end(); ++path_itr) { + if (std::filesystem::is_directory(*path_itr)) { + std::string cache_key = path_itr->filename().native(); + if (cache_key.size() > KEY_PREFIX_LENGTH) { + // try our best to delete, not care the return + (void)fs->delete_directory(*path_itr); + } + } + } + auto s = fs->delete_file(get_version_path()); + if (!s.ok()) { + LOG(WARNING) << "deleted old version file failed: " << s.to_string(); + return; + } + s = write_file_cache_version(); + if (!s.ok()) { + LOG(WARNING) << "write new version file failed: " << s.to_string(); + return; + } +} + +Status FSFileCacheStorage::collect_directory_entries(const std::filesystem::path& dir_path, + std::vector<std::string>& file_list) const { + std::error_code ec; + bool success = false; + size_t retry_count = 0; + const size_t max_retry = 5; + + while (!success && retry_count < max_retry) { + try { + ++retry_count; + std::filesystem::directory_iterator it {dir_path, ec}; + TEST_SYNC_POINT_CALLBACK("FSFileCacheStorage::collect_directory_entries"); + if (ec) { + LOG(WARNING) << "Failed to list directory: " << dir_path + << ", error: " << ec.message(); + return Status::InternalError("Failed to list dir {}: {}", dir_path.native(), + ec.message()); + } + + file_list.clear(); + for (; it != std::filesystem::directory_iterator(); ++it) { + file_list.push_back(it->path().string()); + } + success = true; + } catch (const std::exception& e) { + LOG(WARNING) << "Error occurred while iterating directory: " << dir_path + << " err: " << e.what(); + file_list.clear(); + } + } + + if (!success) { + LOG_WARNING("iteration of cache dir still failed after retry {} times.", max_retry); Review Comment: log stats -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org