gavinchou commented on code in PR #50726: URL: https://github.com/apache/doris/pull/50726#discussion_r2096010192
########## be/src/io/cache/fs_file_cache_storage.cpp: ########## @@ -326,91 +442,139 @@ Status FSFileCacheStorage::upgrade_cache_dir_if_necessary() const { std::string version; std::error_code ec; int rename_count = 0; + int failure_count = 0; auto start_time = std::chrono::steady_clock::now(); RETURN_IF_ERROR(read_file_cache_version(&version)); + LOG(INFO) << "Checking cache version upgrade. Current version: " << version << ", target version: 2.0, need upgrade: " << (USE_CACHE_VERSION2 && version != "2.0"); if (USE_CACHE_VERSION2 && version != "2.0") { // move directories format as version 2.0 - std::filesystem::directory_iterator key_it {_cache_base_path, ec}; - if (ec) { - LOG(WARNING) << "Failed to list directory: " << _cache_base_path - << ", error: " << ec.message(); - return Status::InternalError("Failed to list dir {}: {}", _cache_base_path, - ec.message()); - } - for (; key_it != std::filesystem::directory_iterator(); ++key_it) { - if (key_it->is_directory()) { - std::string cache_key = key_it->path().filename().native(); - if (cache_key.size() > KEY_PREFIX_LENGTH) { - std::string key_prefix = - Path(_cache_base_path) / cache_key.substr(0, KEY_PREFIX_LENGTH); - bool exists = false; - auto exists_status = fs->exists(key_prefix, &exists); - if (!exists_status.ok()) { - LOG(WARNING) << "Failed to check directory existence: " << key_prefix - << ", error: " << exists_status.to_string(); - return exists_status; - } - if (!exists) { - auto create_status = fs->create_directory(key_prefix); - if (!create_status.ok()) { - LOG(WARNING) << "Failed to create directory: " << key_prefix - << ", error: " << create_status.to_string(); - return create_status; + std::vector<std::filesystem::directory_iterator> file_list; + RETURN_IF_ERROR(collect_directory_entries(_cache_base_path, file_list)); + + // this directory_iterator should be a problem in concurrent access + for (const auto& key_it : file_list) { + try { + if (key_it->is_directory()) { + std::string cache_key = key_it->path().filename().native(); + if (cache_key.size() > KEY_PREFIX_LENGTH) { + std::string key_prefix = + Path(_cache_base_path) / cache_key.substr(0, KEY_PREFIX_LENGTH); + bool exists = false; + auto exists_status = fs->exists(key_prefix, &exists); + if (!exists_status.ok()) { + LOG(WARNING) << "Failed to check directory existence: " << key_prefix + << ", error: " << exists_status.to_string(); + ++failure_count; + continue; + } + if (!exists) { + auto create_status = fs->create_directory(key_prefix); + if (!create_status.ok() && + create_status.code() != TStatusCode::type::ALREADY_EXIST) { + LOG(WARNING) << "Failed to create directory: " << key_prefix + << ", error: " << create_status.to_string(); + ++failure_count; + continue; + } + } + auto rename_status = fs->rename(key_it->path(), key_prefix / cache_key); + if (rename_status.ok() || + rename_status.code() == TStatusCode::type::DIRECTORY_NOT_EMPTY) { + ++rename_count; + } else { + LOG(WARNING) + << "Failed to rename directory from " << key_it->path().native() + << " to " << (key_prefix / cache_key).native() + << ", error: " << rename_status.to_string(); + ++failure_count; + continue; } - } - auto rename_status = fs->rename(key_it->path(), key_prefix / cache_key); - if (rename_status.ok()) { - ++rename_count; - } else { - LOG(WARNING) - << "Failed to rename directory from " << key_it->path().native() - << " to " << (key_prefix / cache_key).native() - << ", error: " << rename_status.to_string(); - return rename_status; } } + } catch (std::filesystem::filesystem_error& e) { + LOG(WARNING) << "Error occurred while upgrading file cache directory: " + << key_it->path() << " err: " << e.what(); + ++failure_count; } } auto rebuild_dir = [&](std::filesystem::directory_iterator& upgrade_key_it) -> Status { Review Comment: this procedure is unncecssary we can finish renaming and appending _0 simutaneously. ########## be/src/io/cache/fs_file_cache_storage.cpp: ########## @@ -326,91 +442,139 @@ Status FSFileCacheStorage::upgrade_cache_dir_if_necessary() const { std::string version; std::error_code ec; int rename_count = 0; + int failure_count = 0; auto start_time = std::chrono::steady_clock::now(); RETURN_IF_ERROR(read_file_cache_version(&version)); + LOG(INFO) << "Checking cache version upgrade. Current version: " << version << ", target version: 2.0, need upgrade: " << (USE_CACHE_VERSION2 && version != "2.0"); if (USE_CACHE_VERSION2 && version != "2.0") { // move directories format as version 2.0 - std::filesystem::directory_iterator key_it {_cache_base_path, ec}; - if (ec) { - LOG(WARNING) << "Failed to list directory: " << _cache_base_path - << ", error: " << ec.message(); - return Status::InternalError("Failed to list dir {}: {}", _cache_base_path, - ec.message()); - } - for (; key_it != std::filesystem::directory_iterator(); ++key_it) { - if (key_it->is_directory()) { - std::string cache_key = key_it->path().filename().native(); - if (cache_key.size() > KEY_PREFIX_LENGTH) { - std::string key_prefix = - Path(_cache_base_path) / cache_key.substr(0, KEY_PREFIX_LENGTH); - bool exists = false; - auto exists_status = fs->exists(key_prefix, &exists); - if (!exists_status.ok()) { - LOG(WARNING) << "Failed to check directory existence: " << key_prefix - << ", error: " << exists_status.to_string(); - return exists_status; - } - if (!exists) { - auto create_status = fs->create_directory(key_prefix); - if (!create_status.ok()) { - LOG(WARNING) << "Failed to create directory: " << key_prefix - << ", error: " << create_status.to_string(); - return create_status; + std::vector<std::filesystem::directory_iterator> file_list; Review Comment: why do we use iterator as the element type? it may open too many files, consider keep the file name (string) as the element type -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org