This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 3df52cbf7610dc662ec525281b3a04eba360d219 Author: Yongqiang YANG <98214048+dataroar...@users.noreply.github.com> AuthorDate: Sun Jun 30 12:57:14 2024 +0800 [improvement](segmentcache) limit segment cache by memory or segment num (#37026) Also enlarge columns per segment. --- be/src/common/config.cpp | 2 +- be/src/olap/lru_cache.cpp | 5 +++++ be/src/olap/lru_cache.h | 4 +++- be/src/olap/segment_loader.h | 13 ++++++++----- be/src/runtime/exec_env_init.cpp | 8 ++++---- be/test/testutil/run_all_tests.cpp | 2 +- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 580793d36ab..9df75b97bd6 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1068,7 +1068,7 @@ DEFINE_mInt32(schema_cache_sweep_time_sec, "100"); // max number of segment cache, default -1 for backward compatibility fd_number*2/5 DEFINE_mInt32(segment_cache_capacity, "-1"); -DEFINE_mInt32(estimated_num_columns_per_segment, "30"); +DEFINE_mInt32(estimated_num_columns_per_segment, "200"); DEFINE_mInt32(estimated_mem_per_column_reader, "1024"); // The value is calculate by storage_page_cache_limit * index_page_cache_percentage DEFINE_mInt32(segment_cache_memory_percentage, "2"); diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index 031082f6da8..741c2423915 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -22,6 +22,7 @@ namespace doris { DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_capacity, MetricUnit::BYTES); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage, MetricUnit::BYTES); +DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_element_count, MetricUnit::NOUNIT); DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(cache_usage_ratio, MetricUnit::NOUNIT); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_lookup_count, MetricUnit::OPERATIONS); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(cache_hit_count, MetricUnit::OPERATIONS); @@ -542,6 +543,7 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, _entity->register_hook(name, std::bind(&ShardedLRUCache::update_cache_metrics, this)); INT_GAUGE_METRIC_REGISTER(_entity, cache_capacity); INT_GAUGE_METRIC_REGISTER(_entity, cache_usage); + INT_GAUGE_METRIC_REGISTER(_entity, cache_element_count); INT_DOUBLE_METRIC_REGISTER(_entity, cache_usage_ratio); INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_lookup_count); INT_ATOMIC_COUNTER_METRIC_REGISTER(_entity, cache_hit_count); @@ -640,15 +642,18 @@ void ShardedLRUCache::update_cache_metrics() const { size_t total_usage = 0; size_t total_lookup_count = 0; size_t total_hit_count = 0; + size_t total_element_count = 0; for (int i = 0; i < _num_shards; i++) { total_capacity += _shards[i]->get_capacity(); total_usage += _shards[i]->get_usage(); total_lookup_count += _shards[i]->get_lookup_count(); total_hit_count += _shards[i]->get_hit_count(); + total_element_count += _shards[i]->get_element_count(); } cache_capacity->set_value(total_capacity); cache_usage->set_value(total_usage); + cache_element_count->set_value(total_element_count); cache_lookup_count->set_value(total_lookup_count); cache_hit_count->set_value(total_hit_count); cache_usage_ratio->set_value(total_capacity == 0 ? 0 : ((double)total_usage / total_capacity)); diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 50676921044..059020deab5 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -60,7 +60,7 @@ enum LRUCacheType { }; static constexpr LRUCacheType DEFAULT_LRU_CACHE_TYPE = LRUCacheType::SIZE; -static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 16; +static constexpr uint32_t DEFAULT_LRU_CACHE_NUM_SHARDS = 32; static constexpr size_t DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY = 0; class CacheKey { @@ -349,6 +349,7 @@ public: uint64_t get_hit_count() const { return _hit_count; } size_t get_usage() const { return _usage; } size_t get_capacity() const { return _capacity; } + size_t get_element_count() const { return _table.element_count(); } private: void _lru_remove(LRUHandle* e); @@ -433,6 +434,7 @@ private: std::shared_ptr<MetricEntity> _entity; IntGauge* cache_capacity = nullptr; IntGauge* cache_usage = nullptr; + IntGauge* cache_element_count = nullptr; DoubleGauge* cache_usage_ratio = nullptr; IntAtomicCounter* cache_lookup_count = nullptr; IntAtomicCounter* cache_hit_count = nullptr; diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index 4d1f3f7a910..5bb8fae3c41 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -80,10 +80,11 @@ public: segment_v2::SegmentSharedPtr segment; }; - SegmentCache(size_t capacity) - : LRUCachePolicyTrackingManual(CachePolicy::CacheType::SEGMENT_CACHE, capacity, - LRUCacheType::SIZE, - config::tablet_rowset_stale_sweep_time_sec) {} + SegmentCache(size_t memory_bytes_limit, size_t segment_num_limit) + : LRUCachePolicyTrackingManual(CachePolicy::CacheType::SEGMENT_CACHE, + memory_bytes_limit, LRUCacheType::SIZE, + config::tablet_rowset_stale_sweep_time_sec, + DEFAULT_LRU_CACHE_NUM_SHARDS * 2, segment_num_limit) {} // Lookup the given segment in the cache. // If the segment is found, the cache entry will be written into handle. @@ -110,7 +111,9 @@ public: // After the estimation of segment memory usage is provided later, it is recommended // to use Memory as the capacity limit of the cache. - SegmentLoader(size_t capacity) { _segment_cache = std::make_unique<SegmentCache>(capacity); } + SegmentLoader(size_t memory_limit_bytes, size_t segment_num_count) { + _segment_cache = std::make_unique<SegmentCache>(memory_limit_bytes, segment_num_count); + } // Load segments of "rowset", return the "cache_handle" which contains segments. // If use_cache is true, it will be loaded from _cache. diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 6c8ac1e4aa9..bbf012b3a63 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -515,8 +515,8 @@ Status ExecEnv::_init_mem_env() { // SegmentLoader caches segments in rowset granularity. So the size of // opened files will greater than segment_cache_capacity. int64_t segment_cache_capacity = config::segment_cache_capacity; - if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 2 / 5) { - segment_cache_capacity = fd_number * 2 / 5; + if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 1 / 5) { + segment_cache_capacity = fd_number * 1 / 5; } int64_t segment_cache_mem_limit = @@ -526,8 +526,8 @@ Status ExecEnv::_init_mem_env() { min(segment_cache_mem_limit, segment_cache_capacity * config::estimated_num_columns_per_segment * config::estimated_mem_per_column_reader); - _segment_loader = new SegmentLoader(min_segment_cache_mem_limit); - LOG(INFO) << "segment_cache_capacity <= fd_number * 2 / 5, fd_number: " << fd_number + _segment_loader = new SegmentLoader(min_segment_cache_mem_limit, segment_cache_capacity); + LOG(INFO) << "segment_cache_capacity <= fd_number * 1 / 5, fd_number: " << fd_number << " segment_cache_capacity: " << segment_cache_capacity << " min_segment_cache_mem_limit " << min_segment_cache_mem_limit; diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp index 68b7352419c..1cf749d0f7f 100644 --- a/be/test/testutil/run_all_tests.cpp +++ b/be/test/testutil/run_all_tests.cpp @@ -53,7 +53,7 @@ int main(int argc, char** argv) { doris::ExecEnv::GetInstance()->set_dummy_lru_cache(std::make_shared<doris::DummyLRUCache>()); doris::ExecEnv::GetInstance()->set_storage_page_cache( doris::StoragePageCache::create_global_cache(1 << 30, 10, 0)); - doris::ExecEnv::GetInstance()->set_segment_loader(new doris::SegmentLoader(1000)); + doris::ExecEnv::GetInstance()->set_segment_loader(new doris::SegmentLoader(1000, 1000)); std::string conf = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; auto st = doris::config::init(conf.c_str(), false); doris::ExecEnv::GetInstance()->set_tablet_schema_cache( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org