This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new b0a9aaf4943 [opt](file-meta-cache) reduce file meta cache size (#32340) (#32367) b0a9aaf4943 is described below commit b0a9aaf49438e2a0eae9b139f0a8a188ead75588 Author: Mingyu Chen <morning...@163.com> AuthorDate: Mon Mar 18 13:40:36 2024 +0800 [opt](file-meta-cache) reduce file meta cache size (#32340) (#32367) pick part of #32340 2. Reduce the default capability of file meta cache, from 20000 to 1000 Also change the default capability of hdfs file handle cache, from 20000 to 1000 3. Change judgement of whether enable file meta cache when querying If the number of file need to be read is larger than the 1/3 of the file meta cache's capability, file meta cache will be disabled for this query. Because cache is useless if there are too many files. --- be/src/common/config.cpp | 5 +++-- be/src/vec/exec/scan/vfile_scanner.cpp | 5 ++--- be/src/vec/exec/scan/vfile_scanner.h | 9 +++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index f523649f649..27efa016fab 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1051,9 +1051,10 @@ DEFINE_Bool(enable_feature_binlog, "false"); // enable set in BitmapValue DEFINE_Bool(enable_set_in_bitmap_value, "false"); -DEFINE_Int64(max_hdfs_file_handle_cache_num, "20000"); +DEFINE_Int64(max_hdfs_file_handle_cache_num, "1000"); DEFINE_Int32(max_hdfs_file_handle_cache_time_sec, "3600"); -DEFINE_Int64(max_external_file_meta_cache_num, "20000"); +DEFINE_Int64(max_external_file_meta_cache_num, "1000"); + // Apply delete pred in cumu compaction DEFINE_mBool(enable_delete_when_cumu_compaction, "false"); diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 988dab0a502..dc4d643a80d 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -737,9 +737,8 @@ Status VFileScanner::_get_next_reader() { std::unique_ptr<ParquetReader> parquet_reader = ParquetReader::create_unique( _profile, *_params, range, _state->query_options().batch_size, tz, _io_ctx.get(), _state, - config::max_external_file_meta_cache_num <= 0 - ? nullptr - : ExecEnv::GetInstance()->file_meta_cache(), + _shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache() + : nullptr, _state->query_options().enable_parquet_lazy_mat); { SCOPED_TIMER(_open_reader_timer); diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index 58355cdbe36..5261337f000 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -216,5 +216,14 @@ private: _counter.num_rows_unselected = 0; _counter.num_rows_filtered = 0; } + + // enable the file meta cache only when + // 1. max_external_file_meta_cache_num is > 0 + // 2. the file number is less than 1/3 of cache's capacibility + // Otherwise, the cache miss rate will be high + bool _shoudl_enable_file_meta_cache() { + return config::max_external_file_meta_cache_num > 0 && + _ranges.size() < config::max_external_file_meta_cache_num / 3; + } }; } // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org