This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new b0a9aaf4943 [opt](file-meta-cache) reduce file meta cache size 
(#32340) (#32367)
b0a9aaf4943 is described below

commit b0a9aaf49438e2a0eae9b139f0a8a188ead75588
Author: Mingyu Chen <morning...@163.com>
AuthorDate: Mon Mar 18 13:40:36 2024 +0800

    [opt](file-meta-cache) reduce file meta cache size (#32340) (#32367)
    
    pick part of #32340
    
    2. Reduce the default capability of file meta cache, from 20000 to 1000
    
        Also change the default capability of hdfs file handle cache, from 
20000 to 1000
    
    3. Change judgement of whether enable file meta cache when querying
    
        If the number of file need to be read is larger than the 1/3 of the 
file meta cache's capability, file meta cache
        will be disabled for this query. Because cache is useless if there are 
too many files.
---
 be/src/common/config.cpp               | 5 +++--
 be/src/vec/exec/scan/vfile_scanner.cpp | 5 ++---
 be/src/vec/exec/scan/vfile_scanner.h   | 9 +++++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index f523649f649..27efa016fab 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1051,9 +1051,10 @@ DEFINE_Bool(enable_feature_binlog, "false");
 // enable set in BitmapValue
 DEFINE_Bool(enable_set_in_bitmap_value, "false");
 
-DEFINE_Int64(max_hdfs_file_handle_cache_num, "20000");
+DEFINE_Int64(max_hdfs_file_handle_cache_num, "1000");
 DEFINE_Int32(max_hdfs_file_handle_cache_time_sec, "3600");
-DEFINE_Int64(max_external_file_meta_cache_num, "20000");
+DEFINE_Int64(max_external_file_meta_cache_num, "1000");
+
 // Apply delete pred in cumu compaction
 DEFINE_mBool(enable_delete_when_cumu_compaction, "false");
 
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 988dab0a502..dc4d643a80d 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -737,9 +737,8 @@ Status VFileScanner::_get_next_reader() {
             std::unique_ptr<ParquetReader> parquet_reader = 
ParquetReader::create_unique(
                     _profile, *_params, range, 
_state->query_options().batch_size, tz,
                     _io_ctx.get(), _state,
-                    config::max_external_file_meta_cache_num <= 0
-                            ? nullptr
-                            : ExecEnv::GetInstance()->file_meta_cache(),
+                    _shoudl_enable_file_meta_cache() ? 
ExecEnv::GetInstance()->file_meta_cache()
+                                                     : nullptr,
                     _state->query_options().enable_parquet_lazy_mat);
             {
                 SCOPED_TIMER(_open_reader_timer);
diff --git a/be/src/vec/exec/scan/vfile_scanner.h 
b/be/src/vec/exec/scan/vfile_scanner.h
index 58355cdbe36..5261337f000 100644
--- a/be/src/vec/exec/scan/vfile_scanner.h
+++ b/be/src/vec/exec/scan/vfile_scanner.h
@@ -216,5 +216,14 @@ private:
         _counter.num_rows_unselected = 0;
         _counter.num_rows_filtered = 0;
     }
+
+    // enable the file meta cache only when
+    // 1. max_external_file_meta_cache_num is > 0
+    // 2. the file number is less than 1/3 of cache's capacibility
+    // Otherwise, the cache miss rate will be high
+    bool _shoudl_enable_file_meta_cache() {
+        return config::max_external_file_meta_cache_num > 0 &&
+               _ranges.size() < config::max_external_file_meta_cache_num / 3;
+    }
 };
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to