This is an automated email from the ASF dual-hosted git repository. gaodayue pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 4d1e926b6c [feature][config] introduce a new BE config storage_page_cache_shard_size (#9821) 4d1e926b6c is described below commit 4d1e926b6c8b637c4707bbcbe37042d237c081db Author: Dayue Gao <gaoda...@meituan.com> AuthorDate: Sat May 28 10:17:09 2022 +0800 [feature][config] introduce a new BE config storage_page_cache_shard_size (#9821) Co-authored-by: gaodayue <gaoda...@bytedance.com> --- be/src/common/config.h | 3 ++ be/src/olap/lru_cache.cpp | 47 +++++++++++++++++------------ be/src/olap/lru_cache.h | 21 +++++++------ be/src/olap/page_cache.cpp | 20 +++++++----- be/src/olap/page_cache.h | 7 +++-- be/src/olap/segment_loader.cpp | 2 +- be/src/runtime/exec_env_init.cpp | 5 +-- be/test/olap/page_cache_test.cpp | 8 +++-- docs/en/admin-manual/config/be-config.md | 6 ++++ docs/zh-CN/admin-manual/config/be-config.md | 6 ++++ 10 files changed, 80 insertions(+), 45 deletions(-) diff --git a/be/src/common/config.h b/be/src/common/config.h index a73efcf24d..9bdd8ba3e4 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -226,6 +226,9 @@ CONF_Int64(index_stream_cache_capacity, "10737418240"); // Cache for storage page size CONF_String(storage_page_cache_limit, "20%"); +// Shard size for page cache, the value must be power of two. +// It's recommended to set it to a value close to the number of BE cores in order to reduce lock contentions. +CONF_Int32(storage_page_cache_shard_size, "16"); // Percentage for index page cache // all storage page cache will be divided into data_page_cache and index_page_cache CONF_Int32(index_page_cache_percentage, "10"); diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index 90b2e4ddf5..19d8b5d4e9 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -11,6 +11,7 @@ #include <sstream> #include <string> +#include "gutil/bits.h" #include "olap/olap_common.h" #include "olap/olap_define.h" #include "olap/olap_index.h" @@ -430,19 +431,25 @@ inline uint32_t ShardedLRUCache::_hash_slice(const CacheKey& s) { return s.hash(s.data(), s.size(), 0); } -uint32_t ShardedLRUCache::_shard(uint32_t hash) { - return hash >> (32 - kNumShardBits); -} - -ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type) +ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, + uint32_t num_shards) : _name(name), + _num_shard_bits(Bits::FindLSBSetNonZero(num_shards)), + _num_shards(num_shards), + _shards(nullptr), _last_id(1), _mem_tracker(MemTracker::create_tracker(-1, name, nullptr, MemTrackerLevel::OVERVIEW)) { - const size_t per_shard = (total_capacity + (kNumShards - 1)) / kNumShards; - for (int s = 0; s < kNumShards; s++) { - _shards[s] = new LRUCache(type); - _shards[s]->set_capacity(per_shard); + CHECK(num_shards > 0) << "num_shards cannot be 0"; + CHECK_EQ((num_shards & (num_shards - 1)), 0) + << "num_shards should be power of two, but got " << num_shards; + + const size_t per_shard = (total_capacity + (_num_shards - 1)) / _num_shards; + LRUCache** shards = new (std::nothrow) LRUCache*[_num_shards]; + for (int s = 0; s < _num_shards; s++) { + shards[s] = new LRUCache(type); + shards[s]->set_capacity(per_shard); } + _shards = shards; _entity = DorisMetrics::instance()->metric_registry()->register_entity( std::string("lru_cache:") + name, {{"name", name}}); @@ -456,8 +463,11 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, } ShardedLRUCache::~ShardedLRUCache() { - for (int s = 0; s < kNumShards; s++) { - delete _shards[s]; + if (_shards) { + for (int s = 0; s < _num_shards; s++) { + delete _shards[s]; + } + delete[] _shards; } _entity->deregister_hook(_name); DorisMetrics::instance()->metric_registry()->deregister_entity(_entity); @@ -501,7 +511,7 @@ uint64_t ShardedLRUCache::new_id() { int64_t ShardedLRUCache::prune() { int64_t num_prune = 0; - for (int s = 0; s < kNumShards; s++) { + for (int s = 0; s < _num_shards; s++) { num_prune += _shards[s]->prune(); } return num_prune; @@ -509,7 +519,7 @@ int64_t ShardedLRUCache::prune() { int64_t ShardedLRUCache::prune_if(CacheValuePredicate pred) { int64_t num_prune = 0; - for (int s = 0; s < kNumShards; s++) { + for (int s = 0; s < _num_shards; s++) { num_prune += _shards[s]->prune_if(pred); } return num_prune; @@ -520,7 +530,7 @@ void ShardedLRUCache::update_cache_metrics() const { size_t total_usage = 0; size_t total_lookup_count = 0; size_t total_hit_count = 0; - for (int i = 0; i < kNumShards; i++) { + for (int i = 0; i < _num_shards; i++) { total_capacity += _shards[i]->get_capacity(); total_usage += _shards[i]->get_usage(); total_lookup_count += _shards[i]->get_lookup_count(); @@ -536,12 +546,9 @@ void ShardedLRUCache::update_cache_metrics() const { : ((double)total_hit_count / total_lookup_count)); } -Cache* new_lru_cache(const std::string& name, size_t capacity) { - return new ShardedLRUCache(name, capacity, LRUCacheType::SIZE); -} - -Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type) { - return new ShardedLRUCache(name, capacity, type); +Cache* new_lru_cache(const std::string& name, size_t capacity, LRUCacheType type, + uint32_t num_shards) { + return new ShardedLRUCache(name, capacity, type, num_shards); } } // namespace doris diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 6fb87744f4..4bac173fad 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -53,11 +53,10 @@ enum LRUCacheType { NUMBER // The capacity of cache is based on the number of cache entry. }; -// Create a new cache with a specified name and a fixed SIZE capacity. +// Create a new cache with a specified name and capacity. // This implementation of Cache uses a least-recently-used eviction policy. -extern Cache* new_lru_cache(const std::string& name, size_t capacity); - -extern Cache* new_typed_lru_cache(const std::string& name, size_t capacity, LRUCacheType type); +extern Cache* new_lru_cache(const std::string& name, size_t capacity, + LRUCacheType type = LRUCacheType::SIZE, uint32_t num_shards = 16); class CacheKey { public: @@ -356,12 +355,10 @@ private: uint64_t _hit_count = 0; // 命中cache的总次数 }; -static const int kNumShardBits = 4; -static const int kNumShards = 1 << kNumShardBits; - class ShardedLRUCache : public Cache { public: - explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type); + explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, + uint32_t num_shards); // TODO(fdy): 析构时清除所有cache元素 virtual ~ShardedLRUCache(); virtual Handle* insert(const CacheKey& key, void* value, size_t charge, @@ -381,10 +378,14 @@ private: private: static uint32_t _hash_slice(const CacheKey& s); - static uint32_t _shard(uint32_t hash); + uint32_t _shard(uint32_t hash) { + return _num_shard_bits > 0 ? (hash >> (32 - _num_shard_bits)) : 0; + } std::string _name; - LRUCache* _shards[kNumShards]; + const int _num_shard_bits; + const uint32_t _num_shards; + LRUCache** _shards; std::atomic<uint64_t> _last_id; std::shared_ptr<MemTracker> _mem_tracker; diff --git a/be/src/olap/page_cache.cpp b/be/src/olap/page_cache.cpp index c1b0aac1d8..2ec540b384 100644 --- a/be/src/olap/page_cache.cpp +++ b/be/src/olap/page_cache.cpp @@ -23,26 +23,32 @@ namespace doris { StoragePageCache* StoragePageCache::_s_instance = nullptr; -void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_percentage) { +void StoragePageCache::create_global_cache(size_t capacity, int32_t index_cache_percentage, + uint32_t num_shards) { DCHECK(_s_instance == nullptr); - static StoragePageCache instance(capacity, index_cache_percentage); + static StoragePageCache instance(capacity, index_cache_percentage, num_shards); _s_instance = &instance; } -StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage) +StoragePageCache::StoragePageCache(size_t capacity, int32_t index_cache_percentage, + uint32_t num_shards) : _index_cache_percentage(index_cache_percentage), _mem_tracker(MemTracker::create_tracker(capacity, "StoragePageCache", nullptr, MemTrackerLevel::OVERVIEW)) { SCOPED_SWITCH_THREAD_LOCAL_MEM_TRACKER(_mem_tracker); if (index_cache_percentage == 0) { - _data_page_cache = std::unique_ptr<Cache>(new_lru_cache("DataPageCache", capacity)); + _data_page_cache = std::unique_ptr<Cache>( + new_lru_cache("DataPageCache", capacity, LRUCacheType::SIZE, num_shards)); } else if (index_cache_percentage == 100) { - _index_page_cache = std::unique_ptr<Cache>(new_lru_cache("IndexPageCache", capacity)); + _index_page_cache = std::unique_ptr<Cache>( + new_lru_cache("IndexPageCache", capacity, LRUCacheType::SIZE, num_shards)); } else if (index_cache_percentage > 0 && index_cache_percentage < 100) { _data_page_cache = std::unique_ptr<Cache>( - new_lru_cache("DataPageCache", capacity * (100 - index_cache_percentage) / 100)); + new_lru_cache("DataPageCache", capacity * (100 - index_cache_percentage) / 100, + LRUCacheType::SIZE, num_shards)); _index_page_cache = std::unique_ptr<Cache>( - new_lru_cache("IndexPageCache", capacity * index_cache_percentage / 100)); + new_lru_cache("IndexPageCache", capacity * index_cache_percentage / 100, + LRUCacheType::SIZE, num_shards)); } else { CHECK(false) << "invalid index page cache percentage"; } diff --git a/be/src/olap/page_cache.h b/be/src/olap/page_cache.h index be5a3ffd71..f03f50bd5a 100644 --- a/be/src/olap/page_cache.h +++ b/be/src/olap/page_cache.h @@ -54,14 +54,17 @@ public: } }; + static constexpr uint32_t kDefaultNumShards = 16; + // Create global instance of this class - static void create_global_cache(size_t capacity, int32_t index_cache_percentage); + static void create_global_cache(size_t capacity, int32_t index_cache_percentage, + uint32_t num_shards = kDefaultNumShards); // Return global instance. // Client should call create_global_cache before. static StoragePageCache* instance() { return _s_instance; } - StoragePageCache(size_t capacity, int32_t index_cache_percentage); + StoragePageCache(size_t capacity, int32_t index_cache_percentage, uint32_t num_shards); // Lookup the given page in the cache. // diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 5b62c92e24..a105336f6a 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -32,7 +32,7 @@ void SegmentLoader::create_global_instance(size_t capacity) { SegmentLoader::SegmentLoader(size_t capacity) { _cache = std::unique_ptr<Cache>( - new_typed_lru_cache("SegmentLoader:SegmentCache", capacity, LRUCacheType::NUMBER)); + new_lru_cache("SegmentLoader:SegmentCache", capacity, LRUCacheType::NUMBER)); } bool SegmentLoader::_lookup(const SegmentLoader::CacheKey& key, SegmentCacheHandle* handle) { diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index ff5b847810..a1cdf32b63 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -250,8 +250,9 @@ Status ExecEnv::_init_mem_tracker() { // Reason same as buffer_pool_limit storage_cache_limit = storage_cache_limit / 2; } - int32_t index_page_cache_percentage = config::index_page_cache_percentage; - StoragePageCache::create_global_cache(storage_cache_limit, index_page_cache_percentage); + int32_t index_percentage = config::index_page_cache_percentage; + uint32_t num_shards = config::storage_page_cache_shard_size; + StoragePageCache::create_global_cache(storage_cache_limit, index_percentage, num_shards); LOG(INFO) << "Storage page cache memory limit: " << PrettyPrinter::print(storage_cache_limit, TUnit::BYTES) << ", origin config value: " << config::storage_page_cache_limit; diff --git a/be/test/olap/page_cache_test.cpp b/be/test/olap/page_cache_test.cpp index 1eca896010..73c11c39b8 100644 --- a/be/test/olap/page_cache_test.cpp +++ b/be/test/olap/page_cache_test.cpp @@ -21,6 +21,8 @@ namespace doris { +static int kNumShards = StoragePageCache::kDefaultNumShards; + class StoragePageCacheTest : public testing::Test { public: StoragePageCacheTest() {} @@ -29,7 +31,7 @@ public: // All cache space is allocated to data pages TEST(StoragePageCacheTest, data_page_only) { - StoragePageCache cache(kNumShards * 2048, 0); + StoragePageCache cache(kNumShards * 2048, 0, kNumShards); StoragePageCache::CacheKey key("abc", 0); StoragePageCache::CacheKey memory_key("mem", 0); @@ -89,7 +91,7 @@ TEST(StoragePageCacheTest, data_page_only) { // All cache space is allocated to index pages TEST(StoragePageCacheTest, index_page_only) { - StoragePageCache cache(kNumShards * 2048, 100); + StoragePageCache cache(kNumShards * 2048, 100, kNumShards); StoragePageCache::CacheKey key("abc", 0); StoragePageCache::CacheKey memory_key("mem", 0); @@ -149,7 +151,7 @@ TEST(StoragePageCacheTest, index_page_only) { // Cache space is allocated by index_page_cache_ratio TEST(StoragePageCacheTest, mixed_pages) { - StoragePageCache cache(kNumShards * 2048, 10); + StoragePageCache cache(kNumShards * 2048, 10, kNumShards); StoragePageCache::CacheKey data_key("data", 0); StoragePageCache::CacheKey index_key("index", 0); diff --git a/docs/en/admin-manual/config/be-config.md b/docs/en/admin-manual/config/be-config.md index 4bddb44111..ddb0e31f83 100644 --- a/docs/en/admin-manual/config/be-config.md +++ b/docs/en/admin-manual/config/be-config.md @@ -1136,6 +1136,12 @@ Default: 20% Cache for storage page size +### `storage_page_cache_shard_size` + +Default: 16 + +Shard size of StoragePageCache, the value must be power of two. It's recommended to set it to a value close to the number of BE cores in order to reduce lock contentions. + ### `index_page_cache_percentage` * Type: int32 * Description: Index page cache as a percentage of total storage page cache, value range is [0, 100] diff --git a/docs/zh-CN/admin-manual/config/be-config.md b/docs/zh-CN/admin-manual/config/be-config.md index 873ef1d2b8..2d6599a284 100644 --- a/docs/zh-CN/admin-manual/config/be-config.md +++ b/docs/zh-CN/admin-manual/config/be-config.md @@ -1143,6 +1143,12 @@ storage_flood_stage_usage_percent和storage_flood_stage_left_capacity_bytes两 缓存存储页大小 +### `storage_page_cache_shard_size` + +默认值: 16 + +StoragePageCache的分片大小,值为 2^n (n=0,1,2,...)。建议设置为接近BE CPU核数的值,可减少StoragePageCache的锁竞争。 + ### `index_page_cache_percentage` * 类型:int32 * 描述:索引页缓存占总页面缓存的百分比,取值为[0, 100]。 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org