This is an automated email from the ASF dual-hosted git repository. gavinchou pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 29ceef687fcd99312882c1506828236e743e6c44 Author: Gavin Chou <gavineaglec...@gmail.com> AuthorDate: Fri Aug 9 10:51:11 2024 +0800 [fix](file cache) Fix slow IO for table stats procedure, introduced by #37141 (#39123) Session variable `disable_file_cache` is processed as "disposable file cache" in beta_rowset_reader.cpp. ``` if (_read_context->runtime_state != nullptr) { _read_options.io_ctx.query_id = &_read_context->runtime_state->query_id(); _read_options.io_ctx.read_file_cache = _read_context->runtime_state->query_options().enable_file_cache; _read_options.io_ctx.is_disposable = _read_context->runtime_state->query_options().disable_file_cache; } ``` We use disposable cache to avoid IO amp and avoid large amount of eviction from the cached data ("normal cache"). We cannot set the read option cache policy to "no cache" because it may cause IO amp: every page IO will cause a remote IO, which is a performance disaster. --- be/src/olap/parallel_scanner_builder.cpp | 7 ++----- be/src/olap/rowset/beta_rowset.cpp | 18 +++++++----------- be/src/olap/rowset/beta_rowset.h | 9 +++------ be/src/olap/rowset/beta_rowset_reader.cpp | 10 +++------- be/src/olap/segment_loader.cpp | 4 ++-- be/src/olap/segment_loader.h | 3 +-- 6 files changed, 18 insertions(+), 33 deletions(-) diff --git a/be/src/olap/parallel_scanner_builder.cpp b/be/src/olap/parallel_scanner_builder.cpp index 6a2503a70e9..10bd61cd8d5 100644 --- a/be/src/olap/parallel_scanner_builder.cpp +++ b/be/src/olap/parallel_scanner_builder.cpp @@ -182,9 +182,6 @@ Status ParallelScannerBuilder::_load() { bool enable_segment_cache = _state->query_options().__isset.enable_segment_cache ? _state->query_options().enable_segment_cache : true; - bool disable_file_cache = _state->query_options().__isset.disable_file_cache - ? _state->query_options().disable_file_cache - : false; for (auto& rowset : rowsets) { RETURN_IF_ERROR(rowset->load()); const auto rowset_id = rowset->rowset_id(); @@ -192,7 +189,7 @@ Status ParallelScannerBuilder::_load() { RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( std::dynamic_pointer_cast<BetaRowset>(rowset), &segment_cache_handle, - enable_segment_cache, false, disable_file_cache)); + enable_segment_cache, false)); _total_rows += rowset->num_rows(); } } @@ -211,4 +208,4 @@ std::shared_ptr<NewOlapScanner> ParallelScannerBuilder::_build_scanner( return NewOlapScanner::create_shared(_parent, std::move(params)); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 5114cc6595a..832ca314088 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -147,26 +147,23 @@ Status BetaRowset::get_segments_size(std::vector<size_t>* segments_size) { return Status::OK(); } -Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments, - bool disable_file_cache) { - return load_segments(0, num_segments(), segments, disable_file_cache); +Status BetaRowset::load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments) { + return load_segments(0, num_segments(), segments); } Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end, - std::vector<segment_v2::SegmentSharedPtr>* segments, - bool disable_file_cache) { + std::vector<segment_v2::SegmentSharedPtr>* segments) { int64_t seg_id = seg_id_begin; while (seg_id < seg_id_end) { std::shared_ptr<segment_v2::Segment> segment; - RETURN_IF_ERROR(load_segment(seg_id, &segment, disable_file_cache)); + RETURN_IF_ERROR(load_segment(seg_id, &segment)); segments->push_back(std::move(segment)); seg_id++; } return Status::OK(); } -Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment, - bool disable_file_cache) { +Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment) { auto fs = _rowset_meta->fs(); if (!fs) { return Status::Error<INIT_FAILED>("get fs failed"); @@ -175,9 +172,8 @@ Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* se DCHECK(seg_id >= 0); auto seg_path = DORIS_TRY(segment_path(seg_id)); io::FileReaderOptions reader_options { - .cache_type = !disable_file_cache && config::enable_file_cache - ? io::FileCachePolicy::FILE_BLOCK_CACHE - : io::FileCachePolicy::NO_CACHE, + .cache_type = config::enable_file_cache ? io::FileCachePolicy::FILE_BLOCK_CACHE + : io::FileCachePolicy::NO_CACHE, .is_doris_table = true, .cache_base_path = "", .file_size = _rowset_meta->segment_file_size(seg_id), diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 59ed6e061fe..52d5ac5c8a8 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -71,15 +71,12 @@ public: Status check_file_exist() override; - Status load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments, - bool disable_file_cache = false); + Status load_segments(std::vector<segment_v2::SegmentSharedPtr>* segments); Status load_segments(int64_t seg_id_begin, int64_t seg_id_end, - std::vector<segment_v2::SegmentSharedPtr>* segments, - bool disable_file_cache = false); + std::vector<segment_v2::SegmentSharedPtr>* segments); - Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment, - bool disable_file_cache = false); + Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment); Status get_segments_size(std::vector<size_t>* segments_size); diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 4d953d1dbe3..42456bb8625 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -249,13 +249,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context } // load segments - bool disable_file_cache = false; bool enable_segment_cache = true; auto* state = read_context->runtime_state; if (state != nullptr) { - disable_file_cache = state->query_options().__isset.disable_file_cache - ? state->query_options().disable_file_cache - : false; enable_segment_cache = state->query_options().__isset.enable_segment_cache ? state->query_options().enable_segment_cache : true; @@ -264,9 +260,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context bool should_use_cache = use_cache || (_read_context->reader_type == ReaderType::READER_QUERY && enable_segment_cache); SegmentCacheHandle segment_cache_handle; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( - _rowset, &segment_cache_handle, should_use_cache, - /*need_load_pk_index_and_bf*/ false, disable_file_cache)); + RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(_rowset, &segment_cache_handle, + should_use_cache, + /*need_load_pk_index_and_bf*/ false)); // create iterator for each segment auto& segments = segment_cache_handle.get_segments(); diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 98db0351240..12ab89af0be 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -52,7 +52,7 @@ void SegmentCache::erase(const SegmentCache::CacheKey& key) { Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, bool use_cache, - bool need_load_pk_index_and_bf, bool disable_file_cache) { + bool need_load_pk_index_and_bf) { if (cache_handle->is_inited()) { return Status::OK(); } @@ -62,7 +62,7 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, continue; } segment_v2::SegmentSharedPtr segment; - RETURN_IF_ERROR(rowset->load_segment(i, &segment, disable_file_cache)); + RETURN_IF_ERROR(rowset->load_segment(i, &segment)); if (need_load_pk_index_and_bf) { RETURN_IF_ERROR(segment->load_pk_index_and_bf()); } diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index fc2f0d8c03f..5bb8fae3c41 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -118,8 +118,7 @@ public: // Load segments of "rowset", return the "cache_handle" which contains segments. // If use_cache is true, it will be loaded from _cache. Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, - bool use_cache = false, bool need_load_pk_index_and_bf = false, - bool disable_file_cache = false); + bool use_cache = false, bool need_load_pk_index_and_bf = false); void erase_segment(const SegmentCache::CacheKey& key); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org