(doris) 21/21: [improvement](segmentcache) limit segment cache by memory or segment num (#37026)

dataroaring Sat, 29 Jun 2024 22:04:40 -0700

This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


commit 3df52cbf7610dc662ec525281b3a04eba360d219
Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com>
AuthorDate: Sun Jun 30 12:57:14 2024 +0800

    [improvement](segmentcache) limit segment cache by memory or segment num 
(#37026)
    
    Also enlarge columns per segment.
---
 be/src/common/config.cpp           |  2 +-
 be/src/olap/lru_cache.cpp          |  5 +++++
 be/src/olap/lru_cache.h            |  4 +++-
 be/src/olap/segment_loader.h       | 13 ++++++++-----
 be/src/runtime/exec_env_init.cpp   |  8 ++++----
 be/test/testutil/run_all_tests.cpp |  2 +-
 6 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 580793d36ab..9df75b97bd6 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1068,7 +1068,7 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100");
 
 // max number of segment cache, default -1 for backward compatibility 
fd_number*2/5
 DEFINE_mInt32(segment_cache_capacity, "-1");
-DEFINE_mInt32(estimated_num_columns_per_segment, "30");
+DEFINE_mInt32(estimated_num_columns_per_segment, "200");
 DEFINE_mInt32(estimated_mem_per_column_reader, "1024");
 // The value is calculate by storage_page_cache_limit * 
index_page_cache_percentage
 DEFINE_mInt32(segment_cache_memory_percentage, "2");
diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp
index 031082f6da8..741c2423915 100644
--- a/be/src/olap/lru_cache.cpp
+++ b/be/src/olap/lru_cache.cpp
@@ -22,6 +22,7 @@ namespace doris {
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_capacity, MetricUnit::BYTES);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage, MetricUnit::BYTES);
+DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_element_count, MetricUnit::NOUNIT);
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage_ratio, MetricUnit::NOUNIT);
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_lookup_count, 
MetricUnit::OPERATIONS);
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_hit_count, MetricUnit::OPERATIONS);
@@ -542,6 +543,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, 
size_t total_capacity,
     _entity->register_hook(name, 
std::bind(&ShardedLRUCache::update_cache_metrics, this));
     INT_GAUGE_METRIC_REGISTER(_entity, cache_capacity);
     INT_GAUGE_METRIC_REGISTER(_entity, cache_usage);
+    INT_GAUGE_METRIC_REGISTER(_entity, cache_element_count);
     INT_DOUBLE_METRIC_REGISTER(_entity, cache_usage_ratio);
     INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_lookup_count);
     INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_hit_count);
@@ -640,15 +642,18 @@ void ShardedLRUCache::update_cache_metrics() const {
     size_t total_usage = 0;
     size_t total_lookup_count = 0;
     size_t total_hit_count = 0;
+    size_t total_element_count = 0;
     for (int i = 0; i < _num_shards; i++) {
         total_capacity += _shards[i]->get_capacity();
         total_usage += _shards[i]->get_usage();
         total_lookup_count += _shards[i]->get_lookup_count();
         total_hit_count += _shards[i]->get_hit_count();
+        total_element_count += _shards[i]->get_element_count();
     }
 
     cache_capacity->set_value(total_capacity);
     cache_usage->set_value(total_usage);
+    cache_element_count->set_value(total_element_count);
     cache_lookup_count->set_value(total_lookup_count);
     cache_hit_count->set_value(total_hit_count);
     cache_usage_ratio->set_value(total_capacity == 0 ? 0 : 
((double)total_usage / total_capacity));
diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h
index 50676921044..059020deab5 100644
--- a/be/src/olap/lru_cache.h
+++ b/be/src/olap/lru_cache.h
@@ -60,7 +60,7 @@ enum LRUCacheType {
 };
 
 static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE;
-static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 16;
+static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32;
 static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0;
 
 class CacheKey {
@@ -349,6 +349,7 @@ public:
     uint64_t get_hit_count() const { return _hit_count; }
     size_t get_usage() const { return _usage; }
     size_t get_capacity() const { return _capacity; }
+    size_t get_element_count() const { return _table.element_count(); }
 
 private:
     void _lru_remove(LRUHandle* e);
@@ -433,6 +434,7 @@ private:
     std::shared_ptr<MetricEntity> _entity;
     IntGauge* cache_capacity = nullptr;
     IntGauge* cache_usage = nullptr;
+    IntGauge* cache_element_count = nullptr;
     DoubleGauge* cache_usage_ratio = nullptr;
     IntAtomicCounter* cache_lookup_count = nullptr;
     IntAtomicCounter* cache_hit_count = nullptr;
diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h
index 4d1f3f7a910..5bb8fae3c41 100644
--- a/be/src/olap/segment_loader.h
+++ b/be/src/olap/segment_loader.h
@@ -80,10 +80,11 @@ public:
         segment_v2::SegmentSharedPtr segment;
     };
 
-    SegmentCache(size_t capacity)
-            : 
LRUCachePolicyTrackingManual(CachePolicy::CacheType::SEGMENT_CACHE, capacity,
-                                           LRUCacheType::SIZE,
-                                           
config::tablet_rowset_stale_sweep_time_sec) {}
+    SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit)
+            : 
LRUCachePolicyTrackingManual(CachePolicy::CacheType::SEGMENT_CACHE,
+                                           memory_bytes_limit, 
LRUCacheType::SIZE,
+                                           
config::tablet_rowset_stale_sweep_time_sec,
+                                           DEFAULT_LRU_CACHE_NUM_SHARDS * 2, 
segment_num_limit) {}
 
     // Lookup the given segment in the cache.
     // If the segment is found, the cache entry will be written into handle.
@@ -110,7 +111,9 @@ public:
     // After the estimation of segment memory usage is provided later, it is 
recommended
     // to use Memory as the capacity limit of the cache.
 
-    SegmentLoader(size_t capacity) { _segment_cache = 
std::make_unique<SegmentCache>(capacity); }
+    SegmentLoader(size_t memory_limit_bytes, size_t segment_num_count) {
+        _segment_cache = std::make_unique<SegmentCache>(memory_limit_bytes, 
segment_num_count);
+    }
 
     // Load segments of "rowset", return the "cache_handle" which contains 
segments.
     // If use_cache is true, it will be loaded from _cache.
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index 6c8ac1e4aa9..bbf012b3a63 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -515,8 +515,8 @@ Status ExecEnv::_init_mem_env() {
     // SegmentLoader caches segments in rowset granularity. So the size of
     // opened files will greater than segment_cache_capacity.
     int64_t segment_cache_capacity = config::segment_cache_capacity;
-    if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 2 / 
5) {
-        segment_cache_capacity = fd_number * 2 / 5;
+    if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 1 / 
5) {
+        segment_cache_capacity = fd_number * 1 / 5;
     }
 
     int64_t segment_cache_mem_limit =
@@ -526,8 +526,8 @@ Status ExecEnv::_init_mem_env() {
             min(segment_cache_mem_limit, segment_cache_capacity *
                                                  
config::estimated_num_columns_per_segment *
                                                  
config::estimated_mem_per_column_reader);
-    _segment_loader = new SegmentLoader(min_segment_cache_mem_limit);
-    LOG(INFO) << "segment_cache_capacity <= fd_number * 2 / 5, fd_number: " << 
fd_number
+    _segment_loader = new SegmentLoader(min_segment_cache_mem_limit, 
segment_cache_capacity);
+    LOG(INFO) << "segment_cache_capacity <= fd_number * 1 / 5, fd_number: " << 
fd_number
               << " segment_cache_capacity: " << segment_cache_capacity
               << " min_segment_cache_mem_limit " << 
min_segment_cache_mem_limit;
 
diff --git a/be/test/testutil/run_all_tests.cpp 
b/be/test/testutil/run_all_tests.cpp
index 68b7352419c..1cf749d0f7f 100644
--- a/be/test/testutil/run_all_tests.cpp
+++ b/be/test/testutil/run_all_tests.cpp
@@ -53,7 +53,7 @@ int main(int argc, char** argv) {
     
doris::ExecEnv::GetInstance()->set_dummy_lru_cache(std::make_shared<doris::DummyLRUCache>());
     doris::ExecEnv::GetInstance()->set_storage_page_cache(
             doris::StoragePageCache::create_global_cache(1 << 30, 10, 0));
-    doris::ExecEnv::GetInstance()->set_segment_loader(new 
doris::SegmentLoader(1000));
+    doris::ExecEnv::GetInstance()->set_segment_loader(new 
doris::SegmentLoader(1000, 1000));
     std::string conf = std::string(getenv("DORIS_HOME")) + "/conf/be.conf";
     auto st = doris::config::init(conf.c_str(), false);
     doris::ExecEnv::GetInstance()->set_tablet_schema_cache(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris) 21/21: [improvement](segmentcache) limit segment cache by memory or segment num (#37026)

Reply via email to