This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 71645a391c [debug](FileCache) fail over to remote file reader if local 
cache failed (#24097)
71645a391c is described below

commit 71645a391c2cf45d955cd66568117f3d46ea4984
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Sun Sep 10 12:26:17 2023 +0800

    [debug](FileCache) fail over to remote file reader if local cache failed 
(#24097)
    
    Fail over to remote file reader even if local file cache failed. This 
operation can increase the robustness of file cache.
---
 be/src/common/config.cpp                           |  1 +
 be/src/common/config.h                             |  2 +
 .../io/cache/block/cached_remote_file_reader.cpp   | 51 ++++++++++++++++------
 be/src/io/cache/block/cached_remote_file_reader.h  |  3 ++
 4 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index eb03e00450..418fc64ff2 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -974,6 +974,7 @@ DEFINE_Validator(file_cache_min_file_segment_size, [](const 
int64_t config) -> b
 });
 DEFINE_Bool(clear_file_cache, "false");
 DEFINE_Bool(enable_file_cache_query_limit, "false");
+DEFINE_mInt32(file_cache_wait_sec_after_fail, "0"); // // zero for no waiting 
and retrying
 
 DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800");
 DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600");
diff --git a/be/src/common/config.h b/be/src/common/config.h
index c37c58ea98..83b7dbd95f 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1013,6 +1013,8 @@ DECLARE_Int64(file_cache_min_file_segment_size);
 DECLARE_Int64(file_cache_max_file_segment_size);
 DECLARE_Bool(clear_file_cache);
 DECLARE_Bool(enable_file_cache_query_limit);
+// only for debug, will be removed after finding out the root cause
+DECLARE_mInt32(file_cache_wait_sec_after_fail); // zero for no waiting and 
retrying
 
 // inverted index searcher cache
 // cache entry stay time after lookup
diff --git a/be/src/io/cache/block/cached_remote_file_reader.cpp 
b/be/src/io/cache/block/cached_remote_file_reader.cpp
index e2a629fd4c..59e8c40640 100644
--- a/be/src/io/cache/block/cached_remote_file_reader.cpp
+++ b/be/src/io/cache/block/cached_remote_file_reader.cpp
@@ -89,21 +89,9 @@ std::pair<size_t, size_t> 
CachedRemoteFileReader::_align_size(size_t offset,
     return std::make_pair(align_left, align_size);
 }
 
-Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, 
size_t* bytes_read,
-                                            const IOContext* io_ctx) {
-    DCHECK(!closed());
-    DCHECK(io_ctx);
-    if (offset > size()) {
-        return Status::IOError(
-                fmt::format("offset exceeds file size(offset: {), file size: 
{}, path: {})", offset,
-                            size(), path().native()));
-    }
+Status CachedRemoteFileReader::_read_from_cache(size_t offset, Slice result, 
size_t* bytes_read,
+                                                const IOContext* io_ctx) {
     size_t bytes_req = result.size;
-    bytes_req = std::min(bytes_req, size() - offset);
-    if (UNLIKELY(bytes_req == 0)) {
-        *bytes_read = 0;
-        return Status::OK();
-    }
     ReadStatistics stats;
     auto [align_left, align_size] = _align_size(offset, bytes_req);
     CacheContext cache_context(io_ctx);
@@ -224,6 +212,41 @@ Status CachedRemoteFileReader::read_at_impl(size_t offset, 
Slice result, size_t*
     return Status::OK();
 }
 
+Status CachedRemoteFileReader::read_at_impl(size_t offset, Slice result, 
size_t* bytes_read,
+                                            const IOContext* io_ctx) {
+    DCHECK(!closed());
+    DCHECK(io_ctx);
+    if (offset > size()) {
+        return Status::IOError(
+                fmt::format("offset exceeds file size(offset: {), file size: 
{}, path: {})", offset,
+                            size(), path().native()));
+    }
+    size_t bytes_req = result.size;
+    bytes_req = std::min(bytes_req, size() - offset);
+    if (UNLIKELY(bytes_req == 0)) {
+        *bytes_read = 0;
+        return Status::OK();
+    }
+    Status cache_st = _read_from_cache(offset, result, bytes_read, io_ctx);
+    if (UNLIKELY(!cache_st.ok())) {
+        if (config::file_cache_wait_sec_after_fail > 0) {
+            // only for debug, wait and retry to load data from file cache
+            // return error if failed again
+            LOG(WARNING) << "Failed to read data from file cache, and wait "
+                         << config::file_cache_wait_sec_after_fail
+                         << " seconds to reload data: " << 
cache_st.to_string();
+            sleep(config::file_cache_wait_sec_after_fail);
+            cache_st = _read_from_cache(offset, result, bytes_read, io_ctx);
+        } else {
+            // fail over to remote file reader, and return the status of 
remote read
+            LOG(WARNING) << "Failed to read data from file cache, and fail 
over to remote file: "
+                         << cache_st.to_string();
+            return _remote_file_reader->read_at(offset, result, bytes_read, 
io_ctx);
+        }
+    }
+    return cache_st;
+}
+
 void CachedRemoteFileReader::_update_state(const ReadStatistics& read_stats,
                                            FileCacheStatistics* statis) const {
     if (statis == nullptr) {
diff --git a/be/src/io/cache/block/cached_remote_file_reader.h 
b/be/src/io/cache/block/cached_remote_file_reader.h
index 51e9e562a2..7a8e0e82ec 100644
--- a/be/src/io/cache/block/cached_remote_file_reader.h
+++ b/be/src/io/cache/block/cached_remote_file_reader.h
@@ -77,6 +77,9 @@ private:
         int64_t local_write_timer = 0;
     };
     void _update_state(const ReadStatistics& stats, FileCacheStatistics* 
state) const;
+
+    Status _read_from_cache(size_t offset, Slice result, size_t* bytes_read,
+                            const IOContext* io_ctx);
 };
 
 } // namespace io


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to