gavinchou commented on code in PR #50726:
URL: https://github.com/apache/doris/pull/50726#discussion_r2096010192


##########
be/src/io/cache/fs_file_cache_storage.cpp:
##########
@@ -326,91 +442,139 @@ Status 
FSFileCacheStorage::upgrade_cache_dir_if_necessary() const {
     std::string version;
     std::error_code ec;
     int rename_count = 0;
+    int failure_count = 0;
     auto start_time = std::chrono::steady_clock::now();
 
     RETURN_IF_ERROR(read_file_cache_version(&version));
+
     LOG(INFO) << "Checking cache version upgrade. Current version: " << version
               << ", target version: 2.0, need upgrade: "
               << (USE_CACHE_VERSION2 && version != "2.0");
     if (USE_CACHE_VERSION2 && version != "2.0") {
         // move directories format as version 2.0
-        std::filesystem::directory_iterator key_it {_cache_base_path, ec};
-        if (ec) {
-            LOG(WARNING) << "Failed to list directory: " << _cache_base_path
-                         << ", error: " << ec.message();
-            return Status::InternalError("Failed to list dir {}: {}", 
_cache_base_path,
-                                         ec.message());
-        }
-        for (; key_it != std::filesystem::directory_iterator(); ++key_it) {
-            if (key_it->is_directory()) {
-                std::string cache_key = key_it->path().filename().native();
-                if (cache_key.size() > KEY_PREFIX_LENGTH) {
-                    std::string key_prefix =
-                            Path(_cache_base_path) / cache_key.substr(0, 
KEY_PREFIX_LENGTH);
-                    bool exists = false;
-                    auto exists_status = fs->exists(key_prefix, &exists);
-                    if (!exists_status.ok()) {
-                        LOG(WARNING) << "Failed to check directory existence: 
" << key_prefix
-                                     << ", error: " << 
exists_status.to_string();
-                        return exists_status;
-                    }
-                    if (!exists) {
-                        auto create_status = fs->create_directory(key_prefix);
-                        if (!create_status.ok()) {
-                            LOG(WARNING) << "Failed to create directory: " << 
key_prefix
-                                         << ", error: " << 
create_status.to_string();
-                            return create_status;
+        std::vector<std::filesystem::directory_iterator> file_list;
+        RETURN_IF_ERROR(collect_directory_entries(_cache_base_path, 
file_list));
+
+        // this directory_iterator should be a problem in concurrent access
+        for (const auto& key_it : file_list) {
+            try {
+                if (key_it->is_directory()) {
+                    std::string cache_key = key_it->path().filename().native();
+                    if (cache_key.size() > KEY_PREFIX_LENGTH) {
+                        std::string key_prefix =
+                                Path(_cache_base_path) / cache_key.substr(0, 
KEY_PREFIX_LENGTH);
+                        bool exists = false;
+                        auto exists_status = fs->exists(key_prefix, &exists);
+                        if (!exists_status.ok()) {
+                            LOG(WARNING) << "Failed to check directory 
existence: " << key_prefix
+                                         << ", error: " << 
exists_status.to_string();
+                            ++failure_count;
+                            continue;
+                        }
+                        if (!exists) {
+                            auto create_status = 
fs->create_directory(key_prefix);
+                            if (!create_status.ok() &&
+                                create_status.code() != 
TStatusCode::type::ALREADY_EXIST) {
+                                LOG(WARNING) << "Failed to create directory: " 
<< key_prefix
+                                             << ", error: " << 
create_status.to_string();
+                                ++failure_count;
+                                continue;
+                            }
+                        }
+                        auto rename_status = fs->rename(key_it->path(), 
key_prefix / cache_key);
+                        if (rename_status.ok() ||
+                            rename_status.code() == 
TStatusCode::type::DIRECTORY_NOT_EMPTY) {
+                            ++rename_count;
+                        } else {
+                            LOG(WARNING)
+                                    << "Failed to rename directory from " << 
key_it->path().native()
+                                    << " to " << (key_prefix / 
cache_key).native()
+                                    << ", error: " << 
rename_status.to_string();
+                            ++failure_count;
+                            continue;
                         }
-                    }
-                    auto rename_status = fs->rename(key_it->path(), key_prefix 
/ cache_key);
-                    if (rename_status.ok()) {
-                        ++rename_count;
-                    } else {
-                        LOG(WARNING)
-                                << "Failed to rename directory from " << 
key_it->path().native()
-                                << " to " << (key_prefix / cache_key).native()
-                                << ", error: " << rename_status.to_string();
-                        return rename_status;
                     }
                 }
+            } catch (std::filesystem::filesystem_error& e) {
+                LOG(WARNING) << "Error occurred while upgrading file cache 
directory: "
+                             << key_it->path() << " err: " << e.what();
+                ++failure_count;
             }
         }
 
         auto rebuild_dir = [&](std::filesystem::directory_iterator& 
upgrade_key_it) -> Status {

Review Comment:
   this procedure is unncecssary
   we can finish renaming and appending _0 simutaneously.



##########
be/src/io/cache/fs_file_cache_storage.cpp:
##########
@@ -326,91 +442,139 @@ Status 
FSFileCacheStorage::upgrade_cache_dir_if_necessary() const {
     std::string version;
     std::error_code ec;
     int rename_count = 0;
+    int failure_count = 0;
     auto start_time = std::chrono::steady_clock::now();
 
     RETURN_IF_ERROR(read_file_cache_version(&version));
+
     LOG(INFO) << "Checking cache version upgrade. Current version: " << version
               << ", target version: 2.0, need upgrade: "
               << (USE_CACHE_VERSION2 && version != "2.0");
     if (USE_CACHE_VERSION2 && version != "2.0") {
         // move directories format as version 2.0
-        std::filesystem::directory_iterator key_it {_cache_base_path, ec};
-        if (ec) {
-            LOG(WARNING) << "Failed to list directory: " << _cache_base_path
-                         << ", error: " << ec.message();
-            return Status::InternalError("Failed to list dir {}: {}", 
_cache_base_path,
-                                         ec.message());
-        }
-        for (; key_it != std::filesystem::directory_iterator(); ++key_it) {
-            if (key_it->is_directory()) {
-                std::string cache_key = key_it->path().filename().native();
-                if (cache_key.size() > KEY_PREFIX_LENGTH) {
-                    std::string key_prefix =
-                            Path(_cache_base_path) / cache_key.substr(0, 
KEY_PREFIX_LENGTH);
-                    bool exists = false;
-                    auto exists_status = fs->exists(key_prefix, &exists);
-                    if (!exists_status.ok()) {
-                        LOG(WARNING) << "Failed to check directory existence: 
" << key_prefix
-                                     << ", error: " << 
exists_status.to_string();
-                        return exists_status;
-                    }
-                    if (!exists) {
-                        auto create_status = fs->create_directory(key_prefix);
-                        if (!create_status.ok()) {
-                            LOG(WARNING) << "Failed to create directory: " << 
key_prefix
-                                         << ", error: " << 
create_status.to_string();
-                            return create_status;
+        std::vector<std::filesystem::directory_iterator> file_list;

Review Comment:
   why do we use iterator as the element type? it may open too many files,
   consider keep the file name (string) as the element type



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to