This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 29ceef687fcd99312882c1506828236e743e6c44
Author: Gavin Chou <gavineaglec...@gmail.com>
AuthorDate: Fri Aug 9 10:51:11 2024 +0800

    [fix](file cache) Fix slow IO for table stats procedure, introduced by 
#37141 (#39123)
    
    Session variable `disable_file_cache` is processed as "disposable file
    cache" in beta_rowset_reader.cpp.
    
    ```
    if (_read_context->runtime_state != nullptr) {
        _read_options.io_ctx.query_id = 
&_read_context->runtime_state->query_id();
        _read_options.io_ctx.read_file_cache =
                _read_context->runtime_state->query_options().enable_file_cache;
        _read_options.io_ctx.is_disposable =
                
_read_context->runtime_state->query_options().disable_file_cache;
    }
    ```
    
    We use disposable cache to avoid IO amp and avoid large amount of
    eviction from the cached data ("normal cache").
    
    We cannot set the read option cache policy to "no cache" because it may
    cause IO amp: every page IO will cause a remote IO, which is a
    performance disaster.
---
 be/src/olap/parallel_scanner_builder.cpp  |  7 ++-----
 be/src/olap/rowset/beta_rowset.cpp        | 18 +++++++-----------
 be/src/olap/rowset/beta_rowset.h          |  9 +++------
 be/src/olap/rowset/beta_rowset_reader.cpp | 10 +++-------
 be/src/olap/segment_loader.cpp            |  4 ++--
 be/src/olap/segment_loader.h              |  3 +--
 6 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/be/src/olap/parallel_scanner_builder.cpp 
b/be/src/olap/parallel_scanner_builder.cpp
index 6a2503a70e9..10bd61cd8d5 100644
--- a/be/src/olap/parallel_scanner_builder.cpp
+++ b/be/src/olap/parallel_scanner_builder.cpp
@@ -182,9 +182,6 @@ Status ParallelScannerBuilder::_load() {
         bool enable_segment_cache = 
_state->query_options().__isset.enable_segment_cache
                                             ? 
_state->query_options().enable_segment_cache
                                             : true;
-        bool disable_file_cache = 
_state->query_options().__isset.disable_file_cache
-                                          ? 
_state->query_options().disable_file_cache
-                                          : false;
         for (auto& rowset : rowsets) {
             RETURN_IF_ERROR(rowset->load());
             const auto rowset_id = rowset->rowset_id();
@@ -192,7 +189,7 @@ Status ParallelScannerBuilder::_load() {
 
             RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
                     std::dynamic_pointer_cast<BetaRowset>(rowset), 
&segment_cache_handle,
-                    enable_segment_cache, false, disable_file_cache));
+                    enable_segment_cache, false));
             _total_rows += rowset->num_rows();
         }
     }
@@ -211,4 +208,4 @@ std::shared_ptr<NewOlapScanner> 
ParallelScannerBuilder::_build_scanner(
     return NewOlapScanner::create_shared(_parent, std::move(params));
 }
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index 5114cc6595a..832ca314088 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -147,26 +147,23 @@ Status BetaRowset::get_segments_size(std::vector<size_t>* 
segments_size) {
     return Status::OK();
 }
 
-Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* 
segments,
-                                 bool disable_file_cache) {
-    return load_segments(0, num_segments(), segments, disable_file_cache);
+Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* 
segments) {
+    return load_segments(0, num_segments(), segments);
 }
 
 Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end,
-                                 std::vector<segment_v2::SegmentSharedPtr>* 
segments,
-                                 bool disable_file_cache) {
+                                 std::vector<segment_v2::SegmentSharedPtr>* 
segments) {
     int64_t seg_id = seg_id_begin;
     while (seg_id < seg_id_end) {
         std::shared_ptr<segment_v2::Segment> segment;
-        RETURN_IF_ERROR(load_segment(seg_id, &segment, disable_file_cache));
+        RETURN_IF_ERROR(load_segment(seg_id, &segment));
         segments->push_back(std::move(segment));
         seg_id++;
     }
     return Status::OK();
 }
 
-Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* 
segment,
-                                bool disable_file_cache) {
+Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* 
segment) {
     auto fs = _rowset_meta->fs();
     if (!fs) {
         return Status::Error<INIT_FAILED>("get fs failed");
@@ -175,9 +172,8 @@ Status BetaRowset::load_segment(int64_t seg_id, 
segment_v2::SegmentSharedPtr* se
     DCHECK(seg_id >= 0);
     auto seg_path = DORIS_TRY(segment_path(seg_id));
     io::FileReaderOptions reader_options {
-            .cache_type = !disable_file_cache && config::enable_file_cache
-                                  ? io::FileCachePolicy::FILE_BLOCK_CACHE
-                                  : io::FileCachePolicy::NO_CACHE,
+            .cache_type = config::enable_file_cache ? 
io::FileCachePolicy::FILE_BLOCK_CACHE
+                                                    : 
io::FileCachePolicy::NO_CACHE,
             .is_doris_table = true,
             .cache_base_path = "",
             .file_size = _rowset_meta->segment_file_size(seg_id),
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index 59ed6e061fe..52d5ac5c8a8 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -71,15 +71,12 @@ public:
 
     Status check_file_exist() override;
 
-    Status load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments,
-                         bool disable_file_cache = false);
+    Status load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments);
 
     Status load_segments(int64_t seg_id_begin, int64_t seg_id_end,
-                         std::vector<segment_v2::SegmentSharedPtr>* segments,
-                         bool disable_file_cache = false);
+                         std::vector<segment_v2::SegmentSharedPtr>* segments);
 
-    Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment,
-                        bool disable_file_cache = false);
+    Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
 
     Status get_segments_size(std::vector<size_t>* segments_size);
 
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp 
b/be/src/olap/rowset/beta_rowset_reader.cpp
index 4d953d1dbe3..42456bb8625 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -249,13 +249,9 @@ Status 
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     }
 
     // load segments
-    bool disable_file_cache = false;
     bool enable_segment_cache = true;
     auto* state = read_context->runtime_state;
     if (state != nullptr) {
-        disable_file_cache = state->query_options().__isset.disable_file_cache
-                                     ? 
state->query_options().disable_file_cache
-                                     : false;
         enable_segment_cache = 
state->query_options().__isset.enable_segment_cache
                                        ? 
state->query_options().enable_segment_cache
                                        : true;
@@ -264,9 +260,9 @@ Status 
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
     bool should_use_cache = use_cache || (_read_context->reader_type == 
ReaderType::READER_QUERY &&
                                           enable_segment_cache);
     SegmentCacheHandle segment_cache_handle;
-    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(
-            _rowset, &segment_cache_handle, should_use_cache,
-            /*need_load_pk_index_and_bf*/ false, disable_file_cache));
+    RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(_rowset, 
&segment_cache_handle,
+                                                             should_use_cache,
+                                                             
/*need_load_pk_index_and_bf*/ false));
 
     // create iterator for each segment
     auto& segments = segment_cache_handle.get_segments();
diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp
index 98db0351240..12ab89af0be 100644
--- a/be/src/olap/segment_loader.cpp
+++ b/be/src/olap/segment_loader.cpp
@@ -52,7 +52,7 @@ void SegmentCache::erase(const SegmentCache::CacheKey& key) {
 
 Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset,
                                     SegmentCacheHandle* cache_handle, bool 
use_cache,
-                                    bool need_load_pk_index_and_bf, bool 
disable_file_cache) {
+                                    bool need_load_pk_index_and_bf) {
     if (cache_handle->is_inited()) {
         return Status::OK();
     }
@@ -62,7 +62,7 @@ Status SegmentLoader::load_segments(const 
BetaRowsetSharedPtr& rowset,
             continue;
         }
         segment_v2::SegmentSharedPtr segment;
-        RETURN_IF_ERROR(rowset->load_segment(i, &segment, disable_file_cache));
+        RETURN_IF_ERROR(rowset->load_segment(i, &segment));
         if (need_load_pk_index_and_bf) {
             RETURN_IF_ERROR(segment->load_pk_index_and_bf());
         }
diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h
index fc2f0d8c03f..5bb8fae3c41 100644
--- a/be/src/olap/segment_loader.h
+++ b/be/src/olap/segment_loader.h
@@ -118,8 +118,7 @@ public:
     // Load segments of "rowset", return the "cache_handle" which contains 
segments.
     // If use_cache is true, it will be loaded from _cache.
     Status load_segments(const BetaRowsetSharedPtr& rowset, 
SegmentCacheHandle* cache_handle,
-                         bool use_cache = false, bool 
need_load_pk_index_and_bf = false,
-                         bool disable_file_cache = false);
+                         bool use_cache = false, bool 
need_load_pk_index_and_bf = false);
 
     void erase_segment(const SegmentCache::CacheKey& key);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to