This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 31342672dc9 branch-3.0: [fix](cloud) speed up file cache initializtion 
#48687 (#48798)
31342672dc9 is described below

commit 31342672dc9ddd079f25c426bb4939f9323fb748
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Mar 10 14:53:00 2025 +0800

    branch-3.0: [fix](cloud) speed up file cache initializtion #48687 (#48798)
    
    Cherry-picked from #48687
    
    Signed-off-by: zhengyu <[email protected]>
    Co-authored-by: zhengyu <[email protected]>
---
 be/src/io/cache/fs_file_cache_storage.cpp | 93 ++++++++++++++++++++++---------
 1 file changed, 67 insertions(+), 26 deletions(-)

diff --git a/be/src/io/cache/fs_file_cache_storage.cpp 
b/be/src/io/cache/fs_file_cache_storage.cpp
index 43a4a541cb0..c6df21aceed 100644
--- a/be/src/io/cache/fs_file_cache_storage.cpp
+++ b/be/src/io/cache/fs_file_cache_storage.cpp
@@ -309,15 +309,36 @@ std::string 
FSFileCacheStorage::get_path_in_local_cache(const UInt128Wrapper& va
 }
 
 Status FSFileCacheStorage::upgrade_cache_dir_if_necessary() const {
-    /// version 1.0: cache_base_path / key / offset
-    /// version 2.0: cache_base_path / key_prefix / key / offset
+    /*
+     * If use version2 but was version 1, do upgrade:
+     *
+     * Action I:
+     *     version 1.0: cache_base_path / key / offset
+     *     version 2.0: cache_base_path / key_prefix / key / offset
+     *
+     * Action II:
+     *     add '_0' to hash dir
+     *
+     * Note: This is a sync operation with tons of IOs, so it may affect BE
+     * boot time heavily. Fortunately, Action I & II will only happen when
+     * upgrading (once in the cluster life time).
+     */
+
     std::string version;
+    std::error_code ec;
+    int rename_count = 0;
+    auto start_time = std::chrono::steady_clock::now();
+
     RETURN_IF_ERROR(read_file_cache_version(&version));
+    LOG(INFO) << "Checking cache version upgrade. Current version: " << version
+              << ", target version: 2.0, need upgrade: "
+              << (USE_CACHE_VERSION2 && version != "2.0");
     if (USE_CACHE_VERSION2 && version != "2.0") {
         // move directories format as version 2.0
-        std::error_code ec;
         std::filesystem::directory_iterator key_it {_cache_base_path, ec};
         if (ec) {
+            LOG(WARNING) << "Failed to list directory: " << _cache_base_path
+                         << ", error: " << ec.message();
             return Status::InternalError("Failed to list dir {}: {}", 
_cache_base_path,
                                          ec.message());
         }
@@ -328,31 +349,49 @@ Status 
FSFileCacheStorage::upgrade_cache_dir_if_necessary() const {
                     std::string key_prefix =
                             Path(_cache_base_path) / cache_key.substr(0, 
KEY_PREFIX_LENGTH);
                     bool exists = false;
-                    RETURN_IF_ERROR(fs->exists(key_prefix, &exists));
+                    auto exists_status = fs->exists(key_prefix, &exists);
+                    if (!exists_status.ok()) {
+                        LOG(WARNING) << "Failed to check directory existence: 
" << key_prefix
+                                     << ", error: " << 
exists_status.to_string();
+                        return exists_status;
+                    }
                     if (!exists) {
-                        RETURN_IF_ERROR(fs->create_directory(key_prefix));
+                        auto create_status = fs->create_directory(key_prefix);
+                        if (!create_status.ok()) {
+                            LOG(WARNING) << "Failed to create directory: " << 
key_prefix
+                                         << ", error: " << 
create_status.to_string();
+                            return create_status;
+                        }
+                    }
+                    auto rename_status = fs->rename(key_it->path(), key_prefix 
/ cache_key);
+                    if (rename_status.ok()) {
+                        ++rename_count;
+                    } else {
+                        LOG(WARNING)
+                                << "Failed to rename directory from " << 
key_it->path().native()
+                                << " to " << (key_prefix / cache_key).native()
+                                << ", error: " << rename_status.to_string();
+                        return rename_status;
                     }
-                    RETURN_IF_ERROR(fs->rename(key_it->path(), key_prefix / 
cache_key));
                 }
             }
         }
-        if (!write_file_cache_version().ok()) {
-            return Status::InternalError("Failed to write version hints for 
file cache");
-        }
-    }
 
-    auto rebuild_dir = [&](std::filesystem::directory_iterator& 
upgrade_key_it) -> Status {
-        for (; upgrade_key_it != std::filesystem::directory_iterator(); 
++upgrade_key_it) {
-            if (upgrade_key_it->path().filename().native().find('_') == 
std::string::npos) {
-                
RETURN_IF_ERROR(fs->delete_directory(upgrade_key_it->path().native() + "_0"));
-                RETURN_IF_ERROR(
-                        fs->rename(upgrade_key_it->path(), 
upgrade_key_it->path().native() + "_0"));
+        auto rebuild_dir = [&](std::filesystem::directory_iterator& 
upgrade_key_it) -> Status {
+            for (; upgrade_key_it != std::filesystem::directory_iterator(); 
++upgrade_key_it) {
+                if (upgrade_key_it->path().filename().native().find('_') == 
std::string::npos) {
+                    
RETURN_IF_ERROR(fs->delete_directory(upgrade_key_it->path().native() + "_0"));
+                    auto rename_status = fs->rename(upgrade_key_it->path(),
+                                                    
upgrade_key_it->path().native() + "_0");
+                    if (rename_status.ok()) {
+                        ++rename_count;
+                    }
+                    RETURN_IF_ERROR(rename_status);
+                }
             }
-        }
-        return Status::OK();
-    };
-    std::error_code ec;
-    if constexpr (USE_CACHE_VERSION2) {
+            return Status::OK();
+        };
+
         std::filesystem::directory_iterator key_prefix_it {_cache_base_path, 
ec};
         if (ec) [[unlikely]] {
             LOG(WARNING) << ec.message();
@@ -374,13 +413,15 @@ Status 
FSFileCacheStorage::upgrade_cache_dir_if_necessary() const {
             }
             RETURN_IF_ERROR(rebuild_dir(key_it));
         }
-    } else {
-        std::filesystem::directory_iterator key_it {_cache_base_path, ec};
-        if (ec) [[unlikely]] {
-            return Status::IOError(ec.message());
+        if (!write_file_cache_version().ok()) {
+            return Status::InternalError("Failed to write version hints for 
file cache");
         }
-        RETURN_IF_ERROR(rebuild_dir(key_it));
     }
+
+    auto end_time = std::chrono::steady_clock::now();
+    auto duration = 
std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
+    LOG(INFO) << "Cache directory upgrade completed. Total files renamed: " << 
rename_count
+              << ", Time taken: " << duration.count() << "ms";
     return Status::OK();
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to