github-actions[bot] commented on code in PR #40226:
URL: https://github.com/apache/doris/pull/40226#discussion_r1742455782


##########
be/src/io/cache/fs_file_cache_storage.cpp:
##########
@@ -338,6 +397,72 @@ std::string FSFileCacheStorage::get_version_path() const {
     return Path(_cache_base_path) / "version";
 }
 
+Status FSFileCacheStorage::parse_filename_suffix_to_cache_type(
+        const std::shared_ptr<LocalFileSystem>& fs, const Path& file_path, 
long expiration_time,
+        size_t size, size_t* offset, bool* is_tmp, FileCacheType* cache_type) 
const {
+    std::error_code ec;
+    std::string offset_with_suffix = file_path.native();
+    auto delim_pos1 = offset_with_suffix.find('_');
+    bool parsed = true;
+
+    try {
+        if (delim_pos1 == std::string::npos) {
+            // same as type "normal"
+            *offset = stoull(offset_with_suffix);
+        } else {
+            *offset = stoull(offset_with_suffix.substr(0, delim_pos1));
+            std::string suffix = offset_with_suffix.substr(delim_pos1 + 1);
+            // not need persistent anymore
+            // if suffix is equals to "tmp", it should be removed too.
+            if (suffix == "tmp") [[unlikely]] {
+                *is_tmp = true;
+            } else {
+                *cache_type = BlockFileCache::string_to_cache_type(suffix);
+            }
+        }
+    } catch (...) {
+        parsed = false;
+    }
+
+    // File in dir with expiration time > 0 should all be TTL type
+    // while expiration time == 0 should all be NORMAL type but
+    // in old days, bug happens, thus break such consistency, e.g.
+    // BEs shut down during cache type transition.
+    // Nowadays, we only use expiration time to decide the type,
+    // i.e. whenever expiration time > 0, it IS TTL, otherwise
+    // it is NORMAL or INDEX depending on its suffix.
+    // From now on, the ttl type encoding in file name is only for
+    // compatibility. It won't be build into the filename, and existing
+    // ones will be ignored.
+    if (expiration_time > 0) {
+        *cache_type = FileCacheType::TTL;
+    } else if (*cache_type == FileCacheType::TTL && expiration_time == 0) {
+        *cache_type = FileCacheType::NORMAL;
+    }
+
+    if (!parsed) {
+        LOG(WARNING) << "parse offset err, path=" << file_path.native();
+        return Status::InternalError("parse offset err, path={}", 
file_path.native());
+    }
+    TEST_SYNC_POINT_CALLBACK("BlockFileCache::REMOVE_FILE", 
&offset_with_suffix);
+
+    if (ec) {
+        LOG(WARNING) << "failed to file_size: file_name=" << offset_with_suffix
+                     << "error=" << ec.message();
+        return Status::InternalError("failed to file_size: file_name={}, 
error={}",
+                                     offset_with_suffix, ec.message());
+    }
+
+    if (size == 0 && !(*is_tmp)) {
+        auto st = fs->delete_file(file_path);
+        if (!st.ok()) {
+            LOG_WARNING("delete file {} error", file_path.native()).error(st);
+        }
+        return Status::InternalError("file size is 0, file_name={}", 
offset_with_suffix);
+    }
+    return Status::OK();
+}
+
 void FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) 
const {

Review Comment:
   warning: function 'load_cache_info_into_memory' has cognitive complexity of 
53 (threshold 50) [readability-function-cognitive-complexity]
   ```cpp
   void FSFileCacheStorage::load_cache_info_into_memory(BlockFileCache* _mgr) 
const {
                            ^
   ```
   <details>
   <summary>Additional context</summary>
   
   **be/src/io/cache/fs_file_cache_storage.cpp:469:** nesting level increased 
to 1
   ```cpp
       auto add_cell_batch_func = [&]() {
                                  ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:471:** nesting level increased 
to 2
   ```cpp
           auto f = [&](const BatchLoadArgs& args) {
                    ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:473:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               if (_mgr->_files.contains(args.hash) && 
_mgr->_files[args.hash].contains(args.offset)) {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:473:** +1
   ```cpp
               if (_mgr->_files.contains(args.hash) && 
_mgr->_files[args.hash].contains(args.offset)) {
                                                    ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:475:** +4, including nesting 
penalty of 3, nesting level increased to 4
   ```cpp
                   if (file_block->expiration_time() != 
args.ctx.expiration_time ||
                   ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:475:** +1
   ```cpp
                   if (file_block->expiration_time() != 
args.ctx.expiration_time ||
                                                                                
 ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:489:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               if (!args.is_tmp) {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:496:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               if (ec) {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:504:** nesting level increased 
to 1
   ```cpp
       auto scan_file_cache = [&](std::filesystem::directory_iterator& key_it) {
                              ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:506:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           for (; key_it != std::filesystem::directory_iterator(); ++key_it) {
           ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:515:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               if (ec) [[unlikely]] {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:524:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               for (; offset_it != std::filesystem::directory_iterator(); 
++offset_it) {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:529:** +4, including nesting 
penalty of 3, nesting level increased to 4
   ```cpp
                   if (!parse_filename_suffix_to_cache_type(fs, 
offset_it->path().filename().native(),
                   ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:546:** +4, including nesting 
penalty of 3, nesting level increased to 4
   ```cpp
                   if (batch_load_buffer.size() >= scan_length) {
                   ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:554:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
       if constexpr (USE_CACHE_VERSION2) {
       ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:557:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           if (ec) {
           ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:561:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           for (; key_prefix_it != std::filesystem::directory_iterator(); 
++key_prefix_it) {
           ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:562:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               if (!key_prefix_it->is_directory()) {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:566:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               if (key_prefix_it->path().filename().native().size() != 
KEY_PREFIX_LENGTH) {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:571:** +4, including nesting 
penalty of 3, nesting level increased to 4
   ```cpp
                   if (ec) {
                   ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:578:** +3, including nesting 
penalty of 2, nesting level increased to 3
   ```cpp
               if (ec) {
               ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:584:** +1, nesting level 
increased to 1
   ```cpp
       } else {
         ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:586:** +2, including nesting 
penalty of 1, nesting level increased to 2
   ```cpp
           if (ec) {
           ^
   ```
   **be/src/io/cache/fs_file_cache_storage.cpp:592:** +1, including nesting 
penalty of 0, nesting level increased to 1
   ```cpp
       if (!batch_load_buffer.empty()) {
       ^
   ```
   
   </details>
   



##########
be/test/io/cache/block_file_cache_test.cpp:
##########
@@ -3491,7 +3498,82 @@ TEST_F(BlockFileCacheTest, 
test_lazy_load_with_error_file_1) {
     }
 }
 
-TEST_F(BlockFileCacheTest, test_lazy_load_with_error_file_2) {
+TEST_F(BlockFileCacheTest, test_async_load_ttl_suffix) {
+    const int64_t expiration = 1987654321;
+    // old file path format, [hash]_[expiration]/[offset]_ttl
+    // new file path format, [hash]_[expiration]/[offset]
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+    auto sp = SyncPoint::get_instance();

Review Comment:
   warning: 'auto sp' can be declared as 'auto *sp' [readability-qualified-auto]
   
   ```suggestion
   h);auto *
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to