github-actions[bot] commented on code in PR #16003: URL: https://github.com/apache/doris/pull/16003#discussion_r1071763565
########## be/src/olap/rowset/segment_v2/inverted_index_cache.h: ########## @@ -0,0 +1,165 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene.h> + +#include <iostream> +#include <map> +#include <memory> +#include <mutex> +#include <vector> + +#include "io/fs/file_system.h" +#include "olap/lru_cache.h" +#include "runtime/memory/mem_tracker.h" +#include "util/time.h" + +namespace doris { + +namespace segment_v2 { +using IndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>; + +class InvertedIndexCacheHandle; + +class InvertedIndexSearcherCache { +public: + // The cache key of index_searcher lru cache + struct CacheKey { + CacheKey(std::string index_file_path) : index_file_path(index_file_path) {} + std::string index_file_path; + }; + + // The cache value of index_searcher lru cache. + // Holding a opened index_searcher. + struct CacheValue { + // Save the last visit time of this cache entry. + // Use atomic because it may be modified by multi threads. + std::atomic<int64_t> last_visit_time = 0; + std::atomic<int64_t> first_start_time = 0; + IndexSearcherPtr index_searcher; + size_t size = 0; + }; + + // Create global instance of this class. + // "capacity" is the capacity of lru cache. + static void create_global_instance(size_t capacity); + + // Return global instance. + // Client should call create_global_cache before. + static InvertedIndexSearcherCache* instance() { return _s_instance; } + + InvertedIndexSearcherCache(size_t capacity); + + Status get_index_searcher(const io::FileSystemSPtr& fs, const std::string& index_dir, + const std::string& file_name, InvertedIndexCacheHandle* cache_handle, + bool use_cache = true); + + // Try to prune the segment cache if expired. + Status prune(); + + // function `insert` called after inverted index writer close + Status insert(const io::FileSystemSPtr& fs, const std::string& index_dir, + const std::string& file_name); + + // function `erase` called after compaction remove segment + Status erase(const std::string& index_file_path); + +private: + InvertedIndexSearcherCache(); + + // Lookup the given index_searcher in the cache. + // If the index_searcher is found, the cache entry will be written into handle. + // Return true if entry is found, otherwise return false. + bool _lookup(const InvertedIndexSearcherCache::CacheKey& key, InvertedIndexCacheHandle* handle); + + // Insert a cache entry by key. + // And the cache entry will be returned in handle. + // This function is thread-safe. + void _insert(const InvertedIndexSearcherCache::CacheKey& key, CacheValue& value, + InvertedIndexCacheHandle* handle); + +private: + static InvertedIndexSearcherCache* _s_instance; + // A LRU cache to cache all opened index_searcher + std::unique_ptr<Cache> _cache = nullptr; + std::unique_ptr<MemTracker> _mem_tracker = nullptr; +}; + +// A handle for a index_searcher from index_searcher lru cache. +// The handle can ensure that the index_searcher is valid +// and will not be closed while the holder of the handle is accessing the index_searcher. +// The handle will automatically release the cache entry when it is destroyed. +// So the caller need to make sure the handle is valid in lifecycle. +class InvertedIndexCacheHandle { +public: + InvertedIndexCacheHandle() {} + InvertedIndexCacheHandle(Cache* cache, Cache::Handle* handle) + : _cache(cache), _handle(handle) {} + + ~InvertedIndexCacheHandle() { + if (_handle != nullptr) { + CHECK(_cache != nullptr); + CHECK(!owned); + // last_visit_time is set when release. + // because it only be needed when pruning. + ((InvertedIndexSearcherCache::CacheValue*)_cache->value(_handle))->last_visit_time = + UnixMillis(); + _cache->release(_handle); + } + } + + InvertedIndexCacheHandle(InvertedIndexCacheHandle&& other) noexcept { + std::swap(_cache, other._cache); + std::swap(_handle, other._handle); + this->owned = other.owned; + this->index_searcher = std::move(other.index_searcher); + } + + InvertedIndexCacheHandle& operator=(InvertedIndexCacheHandle&& other) noexcept { + std::swap(_cache, other._cache); + std::swap(_handle, other._handle); + this->owned = other.owned; + this->index_searcher = std::move(other.index_searcher); + return *this; + } + + IndexSearcherPtr get_index_searcher() { + if (owned) { + return index_searcher; + } else { + return ((InvertedIndexSearcherCache::CacheValue*)_cache->value(_handle)) + ->index_searcher; + } + } + +public: Review Comment: warning: redundant access specifier has the same accessibility as the previous access specifier [readability-redundant-access-specifiers] ```suggestion ``` **be/src/olap/rowset/segment_v2/inverted_index_cache.h:109:** previously declared here ```cpp public: ^ ``` ########## be/src/olap/rowset/segment_v2/inverted_index_cache.cpp: ########## @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/inverted_index_cache.h" + +#include "olap/rowset/segment_v2/inverted_index_compound_directory.h" +#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" + +namespace doris { +namespace segment_v2 { + +InvertedIndexSearcherCache* InvertedIndexSearcherCache::_s_instance = nullptr; + +static IndexSearcherPtr build_index_searcher(const io::FileSystemSPtr& fs, + const std::string& index_dir, + const std::string& file_name) { + DorisCompoundReader* directory = new DorisCompoundReader( + DorisCompoundDirectory::getDirectory(fs, index_dir.c_str()), file_name.c_str()); + auto closeDirectory = true; + auto index_searcher = + std::make_shared<lucene::search::IndexSearcher>(directory, closeDirectory); + // NOTE: need to cl_refcount-- here, so that directory will be deleted when + // index_searcher is destroyed + _CLDECDELETE(directory) + return index_searcher; +} + +void InvertedIndexSearcherCache::create_global_instance(size_t capacity) { + DCHECK(_s_instance == nullptr); + static InvertedIndexSearcherCache instance(capacity); + _s_instance = &instance; +} + +InvertedIndexSearcherCache::InvertedIndexSearcherCache(size_t capacity) + : _mem_tracker(std::make_unique<MemTracker>("InvertedIndexSearcherCache")) { + SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); + _cache = std::unique_ptr<Cache>( + new_lru_cache("InvertedIndexSearcher:InvertedIndexSearcherCache", capacity)); +} + +Status InvertedIndexSearcherCache::get_index_searcher(const io::FileSystemSPtr& fs, + const std::string& index_dir, + const std::string& file_name, + InvertedIndexCacheHandle* cache_handle, + bool use_cache) { + auto file_path = index_dir + "/" + file_name; + InvertedIndexSearcherCache::CacheKey cache_key(file_path); + if (_lookup(cache_key, cache_handle)) { + cache_handle->owned = false; + return Status::OK(); + } + cache_handle->owned = !use_cache; + IndexSearcherPtr index_searcher = nullptr; + auto mem_tracker = + std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithRead")); + { + SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); + index_searcher = build_index_searcher(fs, index_dir, file_name); + } + + if (use_cache) { + InvertedIndexSearcherCache::CacheValue* cache_value = + new InvertedIndexSearcherCache::CacheValue(); + cache_value->index_searcher = std::move(index_searcher); + cache_value->size = mem_tracker->consumption(); + _insert(cache_key, *cache_value, cache_handle); + } else { + cache_handle->index_searcher = std::move(index_searcher); + } + return Status::OK(); +} + +Status InvertedIndexSearcherCache::insert(const io::FileSystemSPtr& fs, + const std::string& index_dir, + const std::string& file_name) { + auto file_path = index_dir + "/" + file_name; + InvertedIndexSearcherCache::CacheKey cache_key(file_path); + InvertedIndexSearcherCache::CacheValue* cache_value = + new InvertedIndexSearcherCache::CacheValue(); + IndexSearcherPtr index_searcher = nullptr; + auto mem_tracker = + std::unique_ptr<MemTracker>(new MemTracker("InvertedIndexSearcherCacheWithInsert")); + { + SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); + index_searcher = build_index_searcher(fs, index_dir, file_name); + } + + cache_value->index_searcher = std::move(index_searcher); + cache_value->first_start_time = UnixMillis(); + cache_value->size = mem_tracker->consumption(); + + InvertedIndexCacheHandle inverted_index_cache_handle; + _insert(cache_key, *cache_value, &inverted_index_cache_handle); + return Status::OK(); +} + +Status InvertedIndexSearcherCache::erase(const std::string& index_file_path) { + InvertedIndexSearcherCache::CacheKey cache_key(index_file_path); + _cache->erase(cache_key.index_file_path); + return Status::OK(); +} + +Status InvertedIndexSearcherCache::prune() { + const int64_t curtime = UnixMillis(); + auto pred = [curtime](const void* value) -> bool { + InvertedIndexSearcherCache::CacheValue* cache_value = + (InvertedIndexSearcherCache::CacheValue*)value; + bool expired = false; + if (cache_value->first_start_time != 0) { + // add into cache after write index, but no visited in 15 minutes + auto start_expired_time = + cache_value->first_start_time + + (config::index_searcher_cache_stale_sweep_time_sec / 2) * 1000; + if (start_expired_time < curtime && cache_value->last_visit_time == 0) { + expired = true; + } + } + + if (cache_value->last_visit_time != 0) { + // no vistited in 30 minutes + auto visit_expired_time = cache_value->last_visit_time + + config::index_searcher_cache_stale_sweep_time_sec * 1000; + if (visit_expired_time < curtime) { + expired = true; + } + } + + return expired; + }; + + MonotonicStopWatch watch; + watch.start(); + int64_t prune_num = _cache->prune_if(pred); + if (prune_num > 0) Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (prune_num > 0) { ``` be/src/olap/rowset/segment_v2/inverted_index_cache.cpp:149: ```diff - << watch.elapsed_time() / 1000 / 1000; + << watch.elapsed_time() / 1000 / 1000; + } ``` ########## be/src/olap/rowset/segment_v2/inverted_index_cache.h: ########## @@ -0,0 +1,165 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene.h> + +#include <iostream> +#include <map> +#include <memory> +#include <mutex> +#include <vector> + +#include "io/fs/file_system.h" +#include "olap/lru_cache.h" +#include "runtime/memory/mem_tracker.h" +#include "util/time.h" + +namespace doris { + +namespace segment_v2 { +using IndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>; + +class InvertedIndexCacheHandle; + +class InvertedIndexSearcherCache { +public: + // The cache key of index_searcher lru cache + struct CacheKey { + CacheKey(std::string index_file_path) : index_file_path(index_file_path) {} + std::string index_file_path; + }; + + // The cache value of index_searcher lru cache. + // Holding a opened index_searcher. + struct CacheValue { + // Save the last visit time of this cache entry. + // Use atomic because it may be modified by multi threads. + std::atomic<int64_t> last_visit_time = 0; + std::atomic<int64_t> first_start_time = 0; + IndexSearcherPtr index_searcher; + size_t size = 0; + }; + + // Create global instance of this class. + // "capacity" is the capacity of lru cache. + static void create_global_instance(size_t capacity); + + // Return global instance. + // Client should call create_global_cache before. + static InvertedIndexSearcherCache* instance() { return _s_instance; } + + InvertedIndexSearcherCache(size_t capacity); + + Status get_index_searcher(const io::FileSystemSPtr& fs, const std::string& index_dir, + const std::string& file_name, InvertedIndexCacheHandle* cache_handle, + bool use_cache = true); + + // Try to prune the segment cache if expired. + Status prune(); + + // function `insert` called after inverted index writer close + Status insert(const io::FileSystemSPtr& fs, const std::string& index_dir, + const std::string& file_name); + + // function `erase` called after compaction remove segment + Status erase(const std::string& index_file_path); + +private: + InvertedIndexSearcherCache(); + + // Lookup the given index_searcher in the cache. + // If the index_searcher is found, the cache entry will be written into handle. + // Return true if entry is found, otherwise return false. + bool _lookup(const InvertedIndexSearcherCache::CacheKey& key, InvertedIndexCacheHandle* handle); + + // Insert a cache entry by key. + // And the cache entry will be returned in handle. + // This function is thread-safe. + void _insert(const InvertedIndexSearcherCache::CacheKey& key, CacheValue& value, + InvertedIndexCacheHandle* handle); + +private: Review Comment: warning: redundant access specifier has the same accessibility as the previous access specifier [readability-redundant-access-specifiers] ```suggestion ``` **be/src/olap/rowset/segment_v2/inverted_index_cache.h:82:** previously declared here ```cpp private: ^ ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org