This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 3fdabc00c41 [Refact](inverted index) refact inverted index cache to decouple with reader (#30574) 3fdabc00c41 is described below commit 3fdabc00c41387280bc888e5a11a0d12000b8ae5 Author: airborne12 <airborn...@gmail.com> AuthorDate: Thu Feb 1 11:27:03 2024 +0800 [Refact](inverted index) refact inverted index cache to decouple with reader (#30574) --- .../inverted_index/query/query_factory.h | 2 +- .../rowset/segment_v2/inverted_index_cache.cpp | 229 +-------- .../olap/rowset/segment_v2/inverted_index_cache.h | 87 +--- .../rowset/segment_v2/inverted_index_query_type.h | 15 + .../rowset/segment_v2/inverted_index_reader.cpp | 566 ++++++++------------- .../olap/rowset/segment_v2/inverted_index_reader.h | 51 +- .../rowset/segment_v2/inverted_index_searcher.cpp | 112 ++++ .../rowset/segment_v2/inverted_index_searcher.h | 70 +++ .../rowset/segment_v2/inverted_index_writer.cpp | 18 +- .../inverted_index_searcher_cache_test.cpp | 250 ++++----- 10 files changed, 600 insertions(+), 800 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/query_factory.h b/be/src/olap/rowset/segment_v2/inverted_index/query/query_factory.h index 6de8a7e4c25..a24a1379396 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/query_factory.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/query_factory.h @@ -34,8 +34,8 @@ public: static std::unique_ptr<Query> create(InvertedIndexQueryType query_type, Args&&... args) { switch (query_type) { case InvertedIndexQueryType::MATCH_ANY_QUERY: - case InvertedIndexQueryType::EQUAL_QUERY: return std::make_unique<DisjunctionQuery>(std::forward<Args>(args)...); + case InvertedIndexQueryType::EQUAL_QUERY: case InvertedIndexQueryType::MATCH_ALL_QUERY: return std::make_unique<ConjunctionQuery>(std::forward<Args>(args)...); case InvertedIndexQueryType::MATCH_PHRASE_QUERY: diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp index d424d841694..139ad9bd906 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.cpp @@ -42,53 +42,6 @@ namespace doris::segment_v2 { -Status FulltextIndexSearcherBuilder::build(DorisCompoundReader* directory, - OptionalIndexSearcherPtr& output_searcher) { - auto close_directory = true; - lucene::index::IndexReader* reader = nullptr; - try { - reader = lucene::index::IndexReader::open( - directory, config::inverted_index_read_buffer_size, close_directory); - } catch (const CLuceneError& e) { - if (reader) { - reader->close(); - } - return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( - "FulltextIndexSearcherBuilder build error: {}", e.what()); - } - bool close_reader = true; - auto index_searcher = std::make_shared<lucene::search::IndexSearcher>(reader, close_reader); - if (!index_searcher) { - output_searcher = std::nullopt; - return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( - "FulltextIndexSearcherBuilder build index_searcher error."); - } - // NOTE: need to cl_refcount-- here, so that directory will be deleted when - // index_searcher is destroyed - _CLDECDELETE(directory) - output_searcher = index_searcher; - return Status::OK(); -} - -Status BKDIndexSearcherBuilder::build(DorisCompoundReader* directory, - OptionalIndexSearcherPtr& output_searcher) { - try { - auto close_directory = true; - auto bkd_reader = - std::make_shared<lucene::util::bkd::bkd_reader>(directory, close_directory); - if (!bkd_reader->open()) { - LOG(INFO) << "bkd index file " << directory->getPath() + "/" + directory->getFileName() - << " is empty"; - } - output_searcher = bkd_reader; - _CLDECDELETE(directory) - return Status::OK(); - } catch (const CLuceneError& e) { - return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( - "BKDIndexSearcherBuilder build error: {}", e.what()); - } -} - InvertedIndexSearcherCache* InvertedIndexSearcherCache::create_global_instance( size_t capacity, uint32_t num_shards) { return new InvertedIndexSearcherCache(capacity, num_shards); @@ -125,172 +78,6 @@ InvertedIndexSearcherCache::InvertedIndexSearcherCache(size_t capacity, uint32_t } } -Status InvertedIndexSearcherCache::get_index_searcher( - const io::FileSystemSPtr& fs, const std::string& index_dir, const std::string& file_name, - InvertedIndexCacheHandle* cache_handle, OlapReaderStatistics* stats, - InvertedIndexReaderType reader_type, bool& has_null, bool use_cache) { - auto file_path = index_dir + "/" + file_name; - - using namespace std::chrono; - auto start_time = steady_clock::now(); - Defer cost {[&]() { - int64_t cost = duration_cast<microseconds>(steady_clock::now() - start_time).count(); - VLOG_DEBUG << "finish get_index_searcher for " << file_path << ", cost=" << cost << "us"; - }}; - - InvertedIndexSearcherCache::CacheKey cache_key(file_path); - if (_lookup(cache_key, cache_handle)) { - cache_handle->owned = false; - has_null = cache_handle->has_null; - return Status::OK(); - } - - cache_handle->owned = !use_cache; - IndexSearcherPtr index_searcher; - std::unique_ptr<IndexSearcherBuilder> index_builder = nullptr; - auto mem_tracker = std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead"); -#ifndef BE_TEST - { - bool exists = false; - RETURN_IF_ERROR(fs->exists(file_path, &exists)); - if (!exists) { - LOG(WARNING) << "inverted index: " << file_path << " not exist."; - return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( - "inverted index input file {} not found", file_path); - } - SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer); - SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); - switch (reader_type) { - case InvertedIndexReaderType::STRING_TYPE: - case InvertedIndexReaderType::FULLTEXT: { - index_builder = std::make_unique<FulltextIndexSearcherBuilder>(); - break; - } - case InvertedIndexReaderType::BKD: { - index_builder = std::make_unique<BKDIndexSearcherBuilder>(); - break; - } - - default: - LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) - << " is not support for InvertedIndexSearcherCache"; - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "InvertedIndexSearcherCache do not support reader type."); - } - // During the process of opening the index, write the file information read to the idx file cache. - bool open_idx_file_cache = true; - auto* directory = new DorisCompoundReader( - DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()), - file_name.c_str(), config::inverted_index_read_buffer_size, open_idx_file_cache); - auto null_bitmap_file_name = InvertedIndexDescriptor::get_temporary_null_bitmap_file_name(); - if (!directory->fileExists(null_bitmap_file_name.c_str())) { - has_null = false; - cache_handle->has_null = false; - } else { - // roaring bitmap cookie header size is 5 - if (directory->fileLength(null_bitmap_file_name.c_str()) <= 5) { - has_null = false; - cache_handle->has_null = false; - } - } - OptionalIndexSearcherPtr result; - auto st = index_builder->build(directory, result); - if (!st.ok()) { - _CLDECDELETE(directory) - return st; - } - if (!result.has_value()) { - _CLDECDELETE(directory) - LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) - << " build for InvertedIndexSearcherCache error"; - return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( - "InvertedIndexSearcherCache build error."); - } - directory->getDorisIndexInput()->setIdxFileCache(false); - index_searcher = *result; - } -#endif - - if (use_cache) { - IndexCacheValuePtr cache_value = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value->index_searcher = std::move(index_searcher); - cache_value->size = mem_tracker->consumption(); - *cache_handle = InvertedIndexCacheHandle(_policy->cache(), - _insert(cache_key, cache_value.release())); - } else { - cache_handle->index_searcher = std::move(index_searcher); - } - return Status::OK(); -} - -Status InvertedIndexSearcherCache::insert(const io::FileSystemSPtr& fs, - const std::string& index_dir, - const std::string& file_name, - InvertedIndexReaderType reader_type) { - auto file_path = index_dir + "/" + file_name; - - using namespace std::chrono; - auto start_time = steady_clock::now(); - Defer cost {[&]() { - int64_t cost = duration_cast<microseconds>(steady_clock::now() - start_time).count(); - VLOG_DEBUG << "finish insert index_searcher for " << file_path << ", cost=" << cost << "us"; - }}; - - InvertedIndexSearcherCache::CacheKey cache_key(file_path); - IndexCacheValuePtr cache_value = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - IndexSearcherPtr index_searcher; - std::unique_ptr<IndexSearcherBuilder> builder = nullptr; - auto mem_tracker = std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithInsert"); -#ifndef BE_TEST - { - bool exists = false; - RETURN_IF_ERROR(fs->exists(file_path, &exists)); - if (!exists) { - LOG(WARNING) << "inverted index: " << file_path << " not exist."; - return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( - "inverted index input file {} not found", file_path); - } - SCOPED_CONSUME_MEM_TRACKER(mem_tracker.get()); - switch (reader_type) { - case InvertedIndexReaderType::STRING_TYPE: - case InvertedIndexReaderType::FULLTEXT: { - builder = std::make_unique<FulltextIndexSearcherBuilder>(); - break; - } - case InvertedIndexReaderType::BKD: { - builder = std::make_unique<BKDIndexSearcherBuilder>(); - break; - } - - default: - LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) - << " is not support for InvertedIndexSearcherCache"; - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "InvertedIndexSearcherCache do not support reader type."); - } - auto* directory = new DorisCompoundReader( - DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()), - file_name.c_str(), config::inverted_index_read_buffer_size); - OptionalIndexSearcherPtr result; - RETURN_IF_ERROR(builder->build(directory, result)); - if (!result.has_value()) { - LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) - << " build for InvertedIndexSearcherCache error"; - return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( - "InvertedIndexSearcherCache build error."); - } - index_searcher = *result; - } -#endif - - cache_value->index_searcher = std::move(index_searcher); - cache_value->size = mem_tracker->consumption(); - cache_value->last_visit_time = UnixMillis(); - auto* lru_handle = _insert(cache_key, cache_value.release()); - _policy->cache()->release(lru_handle); - return Status::OK(); -} - Status InvertedIndexSearcherCache::erase(const std::string& index_file_path) { InvertedIndexSearcherCache::CacheKey cache_key(index_file_path); _policy->cache()->erase(cache_key.index_file_path); @@ -301,8 +88,8 @@ int64_t InvertedIndexSearcherCache::mem_consumption() { return _policy->cache()->mem_consumption(); } -bool InvertedIndexSearcherCache::_lookup(const InvertedIndexSearcherCache::CacheKey& key, - InvertedIndexCacheHandle* handle) { +bool InvertedIndexSearcherCache::lookup(const InvertedIndexSearcherCache::CacheKey& key, + InvertedIndexCacheHandle* handle) { auto* lru_handle = _policy->cache()->lookup(key.index_file_path); if (lru_handle == nullptr) { return false; @@ -311,6 +98,18 @@ bool InvertedIndexSearcherCache::_lookup(const InvertedIndexSearcherCache::Cache return true; } +void InvertedIndexSearcherCache::insert(const InvertedIndexSearcherCache::CacheKey& cache_key, + CacheValue* cache_value) { + auto* lru_handle = _insert(cache_key, cache_value); + release(lru_handle); +} + +void InvertedIndexSearcherCache::insert(const InvertedIndexSearcherCache::CacheKey& cache_key, + CacheValue* cache_value, InvertedIndexCacheHandle* handle) { + auto* lru_handle = _insert(cache_key, cache_value); + *handle = InvertedIndexCacheHandle(_policy->cache(), lru_handle); +} + Cache::Handle* InvertedIndexSearcherCache::_insert(const InvertedIndexSearcherCache::CacheKey& key, CacheValue* value) { auto deleter = [](const doris::CacheKey& key, void* value) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.h b/be/src/olap/rowset/segment_v2/inverted_index_cache.h index 51791f0e10a..503ff5fe5c7 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_cache.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.h @@ -46,6 +46,7 @@ #include "io/fs/path.h" #include "olap/lru_cache.h" #include "olap/rowset/segment_v2/inverted_index_query_type.h" +#include "olap/rowset/segment_v2/inverted_index_searcher.h" #include "runtime/exec_env.h" #include "runtime/memory/lru_cache_policy.h" #include "runtime/memory/mem_tracker.h" @@ -56,44 +57,20 @@ namespace lucene { namespace search { class IndexSearcher; } // namespace search -namespace util { -namespace bkd { + +namespace util::bkd { class bkd_reader; } -} // namespace util + } // namespace lucene namespace doris { struct OlapReaderStatistics; namespace segment_v2 { -using FulltextIndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>; -using BKDIndexSearcherPtr = std::shared_ptr<lucene::util::bkd::bkd_reader>; -using IndexSearcherPtr = std::variant<FulltextIndexSearcherPtr, BKDIndexSearcherPtr>; -using OptionalIndexSearcherPtr = std::optional<IndexSearcherPtr>; - class InvertedIndexCacheHandle; class DorisCompoundReader; -class IndexSearcherBuilder { -public: - virtual Status build(DorisCompoundReader* directory, - OptionalIndexSearcherPtr& output_searcher) = 0; - virtual ~IndexSearcherBuilder() = default; -}; - -class FulltextIndexSearcherBuilder : public IndexSearcherBuilder { -public: - Status build(DorisCompoundReader* directory, - OptionalIndexSearcherPtr& output_searcher) override; -}; - -class BKDIndexSearcherBuilder : public IndexSearcherBuilder { -public: - Status build(DorisCompoundReader* directory, - OptionalIndexSearcherPtr& output_searcher) override; -}; - class InvertedIndexSearcherCache { public: // The cache key of index_searcher lru cache @@ -106,8 +83,14 @@ public: // Holding an opened index_searcher. struct CacheValue : public LRUCacheValueBase { IndexSearcherPtr index_searcher; - }; + CacheValue() = default; + explicit CacheValue(IndexSearcherPtr searcher, size_t mem_size, int64_t visit_time) + : index_searcher(std::move(searcher)) { + size = mem_size; + last_visit_time = visit_time; + } + }; // Create global instance of this class. // "capacity" is the capacity of lru cache. static InvertedIndexSearcherCache* create_global_instance(size_t capacity, @@ -121,14 +104,15 @@ public: InvertedIndexSearcherCache(size_t capacity, uint32_t num_shards); - Status get_index_searcher(const io::FileSystemSPtr& fs, const std::string& index_dir, - const std::string& file_name, InvertedIndexCacheHandle* cache_handle, - OlapReaderStatistics* stats, InvertedIndexReaderType reader_type, - bool& has_null, bool use_cache = true); + void insert(const InvertedIndexSearcherCache::CacheKey& cache_key, CacheValue* cache_value); - // function `insert` called after inverted index writer close - Status insert(const io::FileSystemSPtr& fs, const std::string& index_dir, - const std::string& file_name, InvertedIndexReaderType reader_type); + void insert(const InvertedIndexSearcherCache::CacheKey& cache_key, CacheValue* cache_value, + InvertedIndexCacheHandle* handle); + + // Lookup the given index_searcher in the cache. + // If the index_searcher is found, the cache entry will be written into handle. + // Return true if entry is found, otherwise return false. + bool lookup(const InvertedIndexSearcherCache::CacheKey& key, InvertedIndexCacheHandle* handle); // function `erase` called after compaction remove segment Status erase(const std::string& index_file_path); @@ -138,7 +122,7 @@ public: int64_t mem_consumption(); private: - InvertedIndexSearcherCache(); + InvertedIndexSearcherCache() = default; class InvertedIndexSearcherCachePolicy : public LRUCachePolicy { public: @@ -158,12 +142,6 @@ private: element_count_capacity, cache_value_time_extractor, cache_value_check_timestamp, true) {} }; - - // Lookup the given index_searcher in the cache. - // If the index_searcher is found, the cache entry will be written into handle. - // Return true if entry is found, otherwise return false. - bool _lookup(const InvertedIndexSearcherCache::CacheKey& key, InvertedIndexCacheHandle* handle); - // Insert a cache entry by key. // And the cache entry will be returned in handle. // This function is thread-safe. @@ -181,14 +159,13 @@ using IndexCacheValuePtr = std::unique_ptr<InvertedIndexSearcherCache::CacheValu // So the caller need to make sure the handle is valid in lifecycle. class InvertedIndexCacheHandle { public: - InvertedIndexCacheHandle() {} + InvertedIndexCacheHandle() = default; InvertedIndexCacheHandle(Cache* cache, Cache::Handle* handle) : _cache(cache), _handle(handle) {} ~InvertedIndexCacheHandle() { if (_handle != nullptr) { CHECK(_cache != nullptr); - CHECK(!owned); // only after get_index_searcher call this destructor will // add `config::index_cache_entry_stay_time_after_lookup_s` on last_visit_time, // this is to extend the retention time of the entries hit by lookup. @@ -201,33 +178,21 @@ public: InvertedIndexCacheHandle(InvertedIndexCacheHandle&& other) noexcept { std::swap(_cache, other._cache); std::swap(_handle, other._handle); - this->owned = other.owned; - this->index_searcher = std::move(other.index_searcher); } InvertedIndexCacheHandle& operator=(InvertedIndexCacheHandle&& other) noexcept { std::swap(_cache, other._cache); std::swap(_handle, other._handle); - this->owned = other.owned; - this->index_searcher = std::move(other.index_searcher); return *this; } IndexSearcherPtr get_index_searcher() { - if (owned) { - return index_searcher; - } else { - return ((InvertedIndexSearcherCache::CacheValue*)_cache->value(_handle)) - ->index_searcher; - } + return ((InvertedIndexSearcherCache::CacheValue*)_cache->value(_handle))->index_searcher; } -public: - // If set to true, the loaded index_searcher will be saved in index_searcher, not in lru cache; - bool owned = false; - // If index searcher include non-null bitmap. - bool has_null = true; - IndexSearcherPtr index_searcher; + InvertedIndexSearcherCache::CacheValue* get_index_cache_value() { + return ((InvertedIndexSearcherCache::CacheValue*)_cache->value(_handle)); + } private: Cache* _cache = nullptr; @@ -298,7 +263,7 @@ public: class InvertedIndexQueryCacheHandle { public: - InvertedIndexQueryCacheHandle() {} + InvertedIndexQueryCacheHandle() = default; InvertedIndexQueryCacheHandle(Cache* cache, Cache::Handle* handle) : _cache(cache), _handle(handle) {} diff --git a/be/src/olap/rowset/segment_v2/inverted_index_query_type.h b/be/src/olap/rowset/segment_v2/inverted_index_query_type.h index 844cec27b46..495c03b8637 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_query_type.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_query_type.h @@ -80,6 +80,21 @@ enum class InvertedIndexQueryType { MATCH_REGEXP_QUERY = 9, }; +inline bool is_range_query(InvertedIndexQueryType query_type) { + return (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY || + query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY || + query_type == InvertedIndexQueryType::LESS_THAN_QUERY || + query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY); +} + +inline bool is_match_query(InvertedIndexQueryType query_type) { + return (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || + query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || + query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY || + query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY || + query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY); +} + inline std::string query_type_to_string(InvertedIndexQueryType query_type) { switch (query_type) { case InvertedIndexQueryType::UNKNOWN_QUERY: { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 34bb42eac67..919670d2182 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -24,7 +24,6 @@ #include <CLucene/debug/mem.h> #include <CLucene/index/Term.h> #include <CLucene/search/IndexSearcher.h> -#include <CLucene/search/PhraseQuery.h> #include <CLucene/search/Query.h> #include <CLucene/search/RangeQuery.h> #include <CLucene/search/TermQuery.h> @@ -36,6 +35,7 @@ #include <CLucene/util/stringUtil.h> #include <string.h> +#include <memory> #include <ostream> #include <roaring/roaring.hh> #include <set> @@ -65,36 +65,14 @@ #include "olap/rowset/segment_v2/inverted_index/query/regexp_query.h" #include "olap/rowset/segment_v2/inverted_index_cache.h" #include "olap/rowset/segment_v2/inverted_index_compound_directory.h" +#include "olap/rowset/segment_v2/inverted_index_searcher.h" #include "olap/types.h" #include "runtime/runtime_state.h" #include "util/faststring.h" #include "util/runtime_profile.h" -#include "util/time.h" #include "vec/common/string_ref.h" -namespace doris { -namespace segment_v2 { - -bool InvertedIndexReader::_is_range_query(InvertedIndexQueryType query_type) { - return (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY || - query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY || - query_type == InvertedIndexQueryType::LESS_THAN_QUERY || - query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY); -} - -bool InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) { - return (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || - query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || - query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY || - query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY || - query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY); -} - -bool InvertedIndexReader::indexExists(io::Path& index_file_path) { - bool exists = false; - RETURN_IF_ERROR(_fs->exists(index_file_path, &exists)); - return exists; -} +namespace doris::segment_v2 { std::unique_ptr<lucene::analysis::Analyzer> InvertedIndexReader::create_analyzer( InvertedIndexCtx* inverted_index_ctx) { @@ -182,7 +160,7 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach auto index_file_path = index_dir / index_file_name; InvertedIndexQueryCache::CacheKey cache_key { index_file_path, "", InvertedIndexQueryType::UNKNOWN_QUERY, "null_bitmap"}; - auto cache = InvertedIndexQueryCache::instance(); + auto* cache = InvertedIndexQueryCache::instance(); if (cache->lookup(cache_key, cache_handle)) { return Status::OK(); } @@ -223,6 +201,55 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach return Status::OK(); } +Status InvertedIndexReader::handle_searcher_cache( + InvertedIndexCacheHandle* inverted_index_cache_handle, OlapReaderStatistics* stats) { + auto index_file_path = _index_dir / _index_file_name; + InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_path.native()); + if (InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key, + inverted_index_cache_handle)) { + return Status::OK(); + } else { + // searcher cache miss + auto mem_tracker = std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead"); + SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer); + IndexSearcherPtr searcher; + RETURN_IF_ERROR(create_index_searcher(&searcher, _fs, _index_dir, _index_file_name, + mem_tracker.get(), type())); + auto* cache_value = new InvertedIndexSearcherCache::CacheValue( + std::move(searcher), mem_tracker->consumption(), UnixMillis()); + InvertedIndexSearcherCache::instance()->insert(searcher_cache_key, cache_value, + inverted_index_cache_handle); + return Status::OK(); + } +} + +Status InvertedIndexReader::create_index_searcher(IndexSearcherPtr* searcher, io::FileSystemSPtr fs, + const io::Path& index_dir, + const std::string& index_file_name, + MemTracker* mem_tracker, + InvertedIndexReaderType reader_type) { + auto index_file_path = index_dir / index_file_name; + bool exists = false; + RETURN_IF_ERROR(fs->exists(index_file_path, &exists)); + if (!exists) { + LOG(WARNING) << "inverted index: " << index_file_path << " not exist."; + return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( + "inverted index input file {} not found", index_file_path.native()); + } + SCOPED_CONSUME_MEM_TRACKER(mem_tracker); + bool open_idx_file_cache = true; + auto* directory = new DorisCompoundReader( + DorisCompoundDirectoryFactory::getDirectory(fs, index_dir.c_str()), + index_file_name.c_str(), config::inverted_index_read_buffer_size, open_idx_file_cache); + + auto index_searcher_builder = + DORIS_TRY(IndexSearcherBuilder::create_index_searcher_builder(reader_type)); + + auto searcher_result = DORIS_TRY(index_searcher_builder->get_index_searcher(directory)); + *searcher = searcher_result; + return Status::OK(); +}; + Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, std::unique_ptr<InvertedIndexIterator>* iterator) { *iterator = InvertedIndexIterator::create_unique(stats, runtime_state, shared_from_this()); @@ -238,24 +265,19 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run LOG(INFO) << column_name << " begin to search the fulltext index from clucene, query_str [" << search_str << "]"; - io::Path path(_path); - auto index_dir = path.parent_path(); - auto index_file_name = InvertedIndexDescriptor::get_index_file_name( - path.filename(), _index_meta.index_id(), _index_meta.get_index_suffix()); - auto index_file_path = index_dir / index_file_name; - try { std::vector<std::string> analyse_result; + InvertedIndexQueryCache::CacheKey cache_key; + if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) { + cache_key = {_index_dir / _index_file_name, column_name, query_type, search_str}; analyse_result.emplace_back(search_str); } else { - InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>(); - inverted_index_ctx->parser_type = get_inverted_index_parser_type_from_string( - get_parser_string_from_properties(_index_meta.properties())); - inverted_index_ctx->parser_mode = - get_parser_mode_string_from_properties(_index_meta.properties()); - inverted_index_ctx->char_filter_map = - get_parser_char_filter_map_from_properties(_index_meta.properties()); + InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>( + get_inverted_index_parser_type_from_string( + get_parser_string_from_properties(_index_meta.properties())), + get_parser_mode_string_from_properties(_index_meta.properties()), + get_parser_char_filter_map_from_properties(_index_meta.properties())); auto analyzer = create_analyzer(inverted_index_ctx.get()); auto lowercase = get_parser_lowercase_from_properties(_index_meta.properties()); if (lowercase == "true") { @@ -263,8 +285,8 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run } else if (lowercase == "false") { analyzer->set_lowercase(false); } - auto reader = create_reader(inverted_index_ctx.get(), search_str); inverted_index_ctx->analyzer = analyzer.get(); + auto reader = create_reader(inverted_index_ctx.get(), search_str); get_analyse_result(analyse_result, reader.get(), analyzer.get(), column_name, query_type); } @@ -273,11 +295,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run "token parser result is empty for query, " "please check your query: '{}' and index parser: '{}'", search_str, get_parser_string_from_properties(_index_meta.properties())); - if (query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || - query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || - query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY || - query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY || - query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) { + if (is_match_query(query_type)) { LOG(WARNING) << msg; return Status::OK(); } else { @@ -289,156 +307,42 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run std::wstring field_ws = std::wstring(column_name.begin(), column_name.end()); roaring::Roaring query_match_bitmap; - bool null_bitmap_already_read = false; if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY || query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY || query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || - query_type == InvertedIndexQueryType::EQUAL_QUERY) { - std::string str_tokens; - for (auto& token : analyse_result) { - str_tokens += token; - str_tokens += " "; - } - - auto* cache = InvertedIndexQueryCache::instance(); - InvertedIndexQueryCache::CacheKey cache_key; - cache_key.index_path = index_file_path; - cache_key.column_name = column_name; - cache_key.query_type = query_type; - //auto str_tokens = lucene_wcstoutf8string(wstr_tokens.c_str(), wstr_tokens.length()); - cache_key.value.swap(str_tokens); - InvertedIndexQueryCacheHandle cache_handle; - std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr; - if (cache->lookup(cache_key, &cache_handle)) { - stats->inverted_index_query_cache_hit++; - term_match_bitmap = cache_handle.get_bitmap(); - } else { - stats->inverted_index_query_cache_miss++; - InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(InvertedIndexSearcherCache::instance()->get_index_searcher( - _fs, _index_dir.c_str(), _index_file_name, &inverted_index_cache_handle, - stats, type(), _has_null)); - auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); - if (FulltextIndexSearcherPtr* searcher_ptr = - std::get_if<FulltextIndexSearcherPtr>(&searcher_variant)) { - term_match_bitmap = std::make_shared<roaring::Roaring>(); - - Status res = - match_index_search(stats, runtime_state, query_type, field_ws, - analyse_result, *searcher_ptr, term_match_bitmap); - if (!res) { - return res; - } + query_type == InvertedIndexQueryType::EQUAL_QUERY || + query_type == InvertedIndexQueryType::MATCH_ANY_QUERY) { + std::string str_tokens = join(analyse_result, " "); - // add to cache - term_match_bitmap->runOptimize(); - cache->insert(cache_key, term_match_bitmap, &cache_handle); - } - } - query_match_bitmap = *term_match_bitmap; - } else if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) { - std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr; - auto* cache = InvertedIndexQueryCache::instance(); - - InvertedIndexQueryCache::CacheKey cache_key; - cache_key.index_path = index_file_path; - cache_key.column_name = column_name; - cache_key.query_type = query_type; - cache_key.value = analyse_result[0]; - InvertedIndexQueryCacheHandle cache_handle; - if (cache->lookup(cache_key, &cache_handle)) { - stats->inverted_index_query_cache_hit++; - term_match_bitmap = cache_handle.get_bitmap(); - } else { - stats->inverted_index_query_cache_miss++; - InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(InvertedIndexSearcherCache::instance()->get_index_searcher( - _fs, _index_dir.c_str(), _index_file_name, &inverted_index_cache_handle, - stats, type(), _has_null)); - auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); - if (FulltextIndexSearcherPtr* searcher_ptr = - std::get_if<FulltextIndexSearcherPtr>(&searcher_variant)) { - term_match_bitmap = std::make_shared<roaring::Roaring>(); - - Status res = - match_index_search(stats, runtime_state, query_type, field_ws, - analyse_result, *searcher_ptr, term_match_bitmap); - if (!res.ok()) { - return res; - } - } - term_match_bitmap->runOptimize(); - cache->insert(cache_key, term_match_bitmap, &cache_handle); - } - query_match_bitmap = *term_match_bitmap; - } else { - bool first = true; - for (auto token : analyse_result) { - std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr; - - // try to get term bitmap match result from cache to avoid query index on cache hit - auto* cache = InvertedIndexQueryCache::instance(); - // use EQUAL_QUERY type here since cache is for each term/token - //auto token = lucene_wcstoutf8string(token_ws.c_str(), token_ws.length()); - std::wstring token_ws = StringUtil::string_to_wstring(token); - - InvertedIndexQueryCache::CacheKey cache_key { - index_file_path, column_name, InvertedIndexQueryType::EQUAL_QUERY, token}; - VLOG_DEBUG << "cache_key:" << cache_key.encode(); - InvertedIndexQueryCacheHandle cache_handle; - if (cache->lookup(cache_key, &cache_handle)) { - stats->inverted_index_query_cache_hit++; - term_match_bitmap = cache_handle.get_bitmap(); - } else { - stats->inverted_index_query_cache_miss++; - InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(InvertedIndexSearcherCache::instance()->get_index_searcher( - _fs, _index_dir.c_str(), _index_file_name, &inverted_index_cache_handle, - stats, type(), _has_null)); - auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); - if (FulltextIndexSearcherPtr* searcher_ptr = - std::get_if<FulltextIndexSearcherPtr>(&searcher_variant)) { - term_match_bitmap = std::make_shared<roaring::Roaring>(); - // unique_ptr with custom deleter - std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> term { - _CLNEW lucene::index::Term(field_ws.c_str(), token_ws.c_str()), - [](lucene::index::Term* term) { _CLDECDELETE(term); }}; - query.reset(new lucene::search::TermQuery(term.get())); - - Status res = normal_index_search(stats, query_type, *searcher_ptr, - null_bitmap_already_read, query, - term_match_bitmap); - if (!res.ok()) { - return res; - } - - // add to cache - term_match_bitmap->runOptimize(); - cache->insert(cache_key, term_match_bitmap, &cache_handle); - } - } - - // add to query_match_bitmap - if (first) { - SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); - query_match_bitmap = *term_match_bitmap; - first = false; - continue; - } + cache_key = {_index_dir / _index_file_name, column_name, query_type, str_tokens}; + } + auto* cache = InvertedIndexQueryCache::instance(); + InvertedIndexQueryCacheHandle cache_handler; - switch (query_type) { - case InvertedIndexQueryType::MATCH_ANY_QUERY: { - SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_op_timer); - query_match_bitmap |= *term_match_bitmap; - break; - } - default: { - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "fulltext query do not support query type other than match."); - } - } + std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr; + auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map); + if (cache_status.ok()) { + return Status::OK(); + } + stats->inverted_index_query_cache_miss++; + FulltextIndexSearcherPtr* searcher_ptr = nullptr; + + InvertedIndexCacheHandle inverted_index_cache_handle; + RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, stats)); + auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); + searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant); + if (searcher_ptr != nullptr) { + term_match_bitmap = std::make_shared<roaring::Roaring>(); + + Status res = match_index_search(stats, runtime_state, query_type, field_ws, + analyse_result, *searcher_ptr, term_match_bitmap); + if (!res.ok()) { + return res; } + term_match_bitmap->runOptimize(); + cache->insert(cache_key, term_match_bitmap, &cache_handler); } + query_match_bitmap = *term_match_bitmap; bit_map->swap(query_match_bitmap); return Status::OK(); @@ -448,41 +352,6 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run } } -Status FullTextIndexReader::normal_index_search( - OlapReaderStatistics* stats, InvertedIndexQueryType query_type, - const FulltextIndexSearcherPtr& index_searcher, bool& null_bitmap_already_read, - const std::unique_ptr<lucene::search::Query>& query, - const std::shared_ptr<roaring::Roaring>& term_match_bitmap) { - check_null_bitmap(index_searcher, null_bitmap_already_read); - - try { - SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - if (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || - query_type == InvertedIndexQueryType::EQUAL_QUERY) { - index_searcher->_search(query.get(), [&term_match_bitmap](DocRange* doc_range) { - if (doc_range->type_ == DocRangeType::kMany) { - term_match_bitmap->addMany(doc_range->doc_many_size_, - doc_range->doc_many->data()); - } else { - term_match_bitmap->addRange(doc_range->doc_range.first, - doc_range->doc_range.second); - } - }); - } else { - index_searcher->_search(query.get(), [&term_match_bitmap](const int32_t docid, - const float_t /*score*/) { - // docid equal to rowid in segment - term_match_bitmap->add(docid); - }); - } - } catch (const CLuceneError& e) { - return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>("CLuceneError occured: {}", - e.what()); - } - - return Status::OK(); -} - Status FullTextIndexReader::match_index_search( OlapReaderStatistics* stats, RuntimeState* runtime_state, InvertedIndexQueryType query_type, const std::wstring& field_ws, const std::vector<std::string>& analyse_result, @@ -505,18 +374,6 @@ Status FullTextIndexReader::match_index_search( return Status::OK(); } -void FullTextIndexReader::check_null_bitmap(const FulltextIndexSearcherPtr& index_searcher, - bool& null_bitmap_already_read) { - // try to reuse index_searcher's directory to read null_bitmap to cache - // to avoid open directory additionally for null_bitmap - if (!null_bitmap_already_read) { - InvertedIndexQueryCacheHandle null_bitmap_cache_handle; - static_cast<void>(read_null_bitmap(&null_bitmap_cache_handle, - index_searcher->getReader()->directory())); - null_bitmap_already_read = true; - } -} - InvertedIndexReaderType FullTextIndexReader::type() { return InvertedIndexReaderType::FULLTEXT; } @@ -535,7 +392,7 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, roaring::Roaring* bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); - const StringRef* search_query = reinterpret_cast<const StringRef*>(query_value); + const auto* search_query = reinterpret_cast<const StringRef*>(query_value); auto act_len = strnlen(search_query->data, search_query->size); std::string search_str(search_query->data, act_len); // std::string search_str = reinterpret_cast<const StringRef*>(query_value)->to_string(); @@ -549,92 +406,90 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, [](lucene::index::Term* term) { _CLDECDELETE(term); }}; std::unique_ptr<lucene::search::Query> query; - io::Path path(_path); - auto index_dir = path.parent_path(); - auto index_file_name = InvertedIndexDescriptor::get_index_file_name( - path.filename(), _index_meta.index_id(), _index_meta.get_index_suffix()); - auto index_file_path = index_dir / index_file_name; + auto index_file_path = _index_dir / _index_file_name; // try to get query bitmap result from cache and return immediately on cache hit InvertedIndexQueryCache::CacheKey cache_key {index_file_path, column_name, query_type, search_str}; - auto cache = InvertedIndexQueryCache::instance(); - InvertedIndexQueryCacheHandle cache_handle; - if (cache->lookup(cache_key, &cache_handle)) { - stats->inverted_index_query_cache_hit++; - SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); - *bit_map = *cache_handle.get_bitmap(); - return Status::OK(); - } else { - stats->inverted_index_query_cache_miss++; - } + auto* cache = InvertedIndexQueryCache::instance(); + InvertedIndexQueryCacheHandle cache_handler; - switch (query_type) { - case InvertedIndexQueryType::MATCH_ANY_QUERY: - case InvertedIndexQueryType::MATCH_ALL_QUERY: - case InvertedIndexQueryType::MATCH_PHRASE_QUERY: - case InvertedIndexQueryType::EQUAL_QUERY: { - query.reset(new lucene::search::TermQuery(term.get())); - break; - } - case InvertedIndexQueryType::LESS_THAN_QUERY: { - query.reset(new lucene::search::RangeQuery(nullptr, term.get(), false)); - break; - } - case InvertedIndexQueryType::LESS_EQUAL_QUERY: { - query.reset(new lucene::search::RangeQuery(nullptr, term.get(), true)); - break; - } - case InvertedIndexQueryType::GREATER_THAN_QUERY: { - query.reset(new lucene::search::RangeQuery(term.get(), nullptr, false)); - break; - } - case InvertedIndexQueryType::GREATER_EQUAL_QUERY: { - query.reset(new lucene::search::RangeQuery(term.get(), nullptr, true)); - break; - } - default: - return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( - "invalid query type when query untokenized inverted index"); + auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map); + if (cache_status.ok()) { + return Status::OK(); } roaring::Roaring result; + FulltextIndexSearcherPtr* searcher_ptr = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(InvertedIndexSearcherCache::instance()->get_index_searcher( - _fs, _index_dir.c_str(), _index_file_name, &inverted_index_cache_handle, stats, type(), - _has_null)); + RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); - if (FulltextIndexSearcherPtr* index_searcher = - std::get_if<FulltextIndexSearcherPtr>(&searcher_variant)) { + searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant); + if (searcher_ptr != nullptr) { // try to reuse index_searcher's directory to read null_bitmap to cache // to avoid open directory additionally for null_bitmap InvertedIndexQueryCacheHandle null_bitmap_cache_handle; static_cast<void>(read_null_bitmap(&null_bitmap_cache_handle, - (*index_searcher)->getReader()->directory())); + (*searcher_ptr)->getReader()->directory())); try { - if (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || - query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || - query_type == InvertedIndexQueryType::EQUAL_QUERY) { + switch (query_type) { + case InvertedIndexQueryType::MATCH_ANY_QUERY: + case InvertedIndexQueryType::MATCH_ALL_QUERY: + case InvertedIndexQueryType::EQUAL_QUERY: { + query = std::make_unique<lucene::search::TermQuery>(term.get()); SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - (*index_searcher)->_search(query.get(), [&result](DocRange* doc_range) { + (*searcher_ptr)->_search(query.get(), [&result](DocRange* doc_range) { if (doc_range->type_ == DocRangeType::kMany) { result.addMany(doc_range->doc_many_size_, doc_range->doc_many->data()); } else { result.addRange(doc_range->doc_range.first, doc_range->doc_range.second); } }); - } else { + break; + } + case InvertedIndexQueryType::MATCH_PHRASE_QUERY: { + query = std::make_unique<lucene::search::TermQuery>(term.get()); SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - (*index_searcher) + (*searcher_ptr) ->_search(query.get(), [&result](const int32_t docid, const float_t /*score*/) { // docid equal to rowid in segment result.add(docid); }); + break; + } + + case InvertedIndexQueryType::LESS_THAN_QUERY: + case InvertedIndexQueryType::LESS_EQUAL_QUERY: + case InvertedIndexQueryType::GREATER_THAN_QUERY: + case InvertedIndexQueryType::GREATER_EQUAL_QUERY: { + bool include_upper = query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY; + bool include_lower = query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY; + + if (query_type == InvertedIndexQueryType::LESS_THAN_QUERY || + query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY) { + query = std::make_unique<lucene::search::RangeQuery>(nullptr, term.get(), + include_upper); + } else { // GREATER_THAN_QUERY or GREATER_EQUAL_QUERY + query = std::make_unique<lucene::search::RangeQuery>(term.get(), nullptr, + include_lower); + } + + SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); + (*searcher_ptr) + ->_search(query.get(), + [&result](const int32_t docid, const float_t /*score*/) { + result.add(docid); + }); + break; + } + default: + return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( + "invalid query type when query untokenized inverted index"); } } catch (const CLuceneError& e) { - if (_is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) { + if (is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) { return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>( "range query term exceeds limits, try to downgrade from inverted index, " "column " @@ -651,7 +506,7 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, std::shared_ptr<roaring::Roaring> term_match_bitmap = std::make_shared<roaring::Roaring>(result); term_match_bitmap->runOptimize(); - cache->insert(cache_key, term_match_bitmap, &cache_handle); + cache->insert(cache_key, term_match_bitmap, &cache_handler); bit_map->swap(result); } @@ -669,10 +524,9 @@ Status BkdIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* r } template <InvertedIndexQueryType QT> -Status BkdIndexReader::bkd_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, - std::shared_ptr<lucene::util::bkd::bkd_reader> r, - InvertedIndexVisitor<QT>* visitor) { +Status BkdIndexReader::construct_bkd_query_value(const void* query_value, + std::shared_ptr<lucene::util::bkd::bkd_reader> r, + InvertedIndexVisitor<QT>* visitor) { char tmp[r->bytes_per_dim_]; if constexpr (QT == InvertedIndexQueryType::EQUAL_QUERY) { _value_key_coder->full_encode_ascending(query_value, &visitor->query_max); @@ -691,12 +545,10 @@ Status BkdIndexReader::bkd_query(OlapReaderStatistics* stats, const std::string& return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( "invalid query type when query bkd index"); } - visitor->set_reader(r.get()); return Status::OK(); } -Status BkdIndexReader::invoke_bkd_try_query(OlapReaderStatistics* stats, - const std::string& column_name, const void* query_value, +Status BkdIndexReader::invoke_bkd_try_query(const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr<lucene::util::bkd::bkd_reader> r, uint32_t* count) { @@ -704,39 +556,39 @@ Status BkdIndexReader::invoke_bkd_try_query(OlapReaderStatistics* stats, case InvertedIndexQueryType::LESS_THAN_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>>( - nullptr, true); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), nullptr, true); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); *count = r->estimate_point_count(visitor.get()); break; } case InvertedIndexQueryType::LESS_EQUAL_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_EQUAL_QUERY>>( - nullptr, true); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), nullptr, true); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); *count = r->estimate_point_count(visitor.get()); break; } case InvertedIndexQueryType::GREATER_THAN_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>>( - nullptr, true); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), nullptr, true); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); *count = r->estimate_point_count(visitor.get()); break; } case InvertedIndexQueryType::GREATER_EQUAL_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_EQUAL_QUERY>>( - nullptr, true); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), nullptr, true); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); *count = r->estimate_point_count(visitor.get()); break; } case InvertedIndexQueryType::EQUAL_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::EQUAL_QUERY>>( - nullptr, true); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), nullptr, true); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); *count = r->estimate_point_count(visitor.get()); break; } @@ -746,47 +598,46 @@ Status BkdIndexReader::invoke_bkd_try_query(OlapReaderStatistics* stats, return Status::OK(); } -Status BkdIndexReader::invoke_bkd_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, +Status BkdIndexReader::invoke_bkd_query(const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr<lucene::util::bkd::bkd_reader> r, roaring::Roaring* bit_map) { switch (query_type) { case InvertedIndexQueryType::LESS_THAN_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>>( - bit_map); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), bit_map); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); r->intersect(visitor.get()); break; } case InvertedIndexQueryType::LESS_EQUAL_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_EQUAL_QUERY>>( - bit_map); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), bit_map); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); r->intersect(visitor.get()); break; } case InvertedIndexQueryType::GREATER_THAN_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>>( - bit_map); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), bit_map); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); r->intersect(visitor.get()); break; } case InvertedIndexQueryType::GREATER_EQUAL_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_EQUAL_QUERY>>( - bit_map); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), bit_map); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); r->intersect(visitor.get()); break; } case InvertedIndexQueryType::EQUAL_QUERY: { auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::EQUAL_QUERY>>( - bit_map); - RETURN_IF_ERROR(bkd_query(stats, column_name, query_value, r, visitor.get())); + r.get(), bit_map); + RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get())); r->intersect(visitor.get()); break; } @@ -815,13 +666,13 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string& auto* cache = InvertedIndexQueryCache::instance(); InvertedIndexQueryCacheHandle cache_handler; roaring::Roaring bit_map; - auto cache_status = handle_cache(cache, cache_key, &cache_handler, stats, &bit_map); + auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, &bit_map); if (cache_status.ok()) { *count = bit_map.cardinality(); return Status::OK(); } - return invoke_bkd_try_query(stats, column_name, query_value, query_type, r, count); + return invoke_bkd_try_query(query_value, query_type, r, count); } catch (const CLuceneError& e) { return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( "BKD Query CLuceneError Occurred, error msg: {}", e.what()); @@ -831,21 +682,6 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string& return Status::OK(); } -Status BkdIndexReader::handle_cache(InvertedIndexQueryCache* cache, - const InvertedIndexQueryCache::CacheKey& cache_key, - InvertedIndexQueryCacheHandle* cache_handler, - OlapReaderStatistics* stats, roaring::Roaring* bit_map) { - if (cache->lookup(cache_key, cache_handler)) { - stats->inverted_index_query_cache_hit++; - SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); - *bit_map = *cache_handler->get_bitmap(); - return Status::OK(); - } else { - stats->inverted_index_query_cache_miss++; - return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss"); - } -} - Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, roaring::Roaring* bit_map) { @@ -864,14 +700,14 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_ InvertedIndexQueryCache::CacheKey cache_key {_index_dir / _index_file_name, column_name, query_type, query_str}; - auto cache = InvertedIndexQueryCache::instance(); + auto* cache = InvertedIndexQueryCache::instance(); InvertedIndexQueryCacheHandle cache_handler; - auto cache_status = handle_cache(cache, cache_key, &cache_handler, stats, bit_map); + auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map); if (cache_status.ok()) { return Status::OK(); } - RETURN_IF_ERROR(invoke_bkd_query(stats, column_name, query_value, query_type, r, bit_map)); + RETURN_IF_ERROR(invoke_bkd_query(query_value, query_type, r, bit_map)); std::shared_ptr<roaring::Roaring> query_bitmap = std::make_shared<roaring::Roaring>(*bit_map); query_bitmap->runOptimize(); @@ -889,12 +725,11 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_ Status BkdIndexReader::get_bkd_reader(BKDIndexSearcherPtr& bkd_reader, OlapReaderStatistics* stats) { + BKDIndexSearcherPtr* bkd_searcher = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(InvertedIndexSearcherCache::instance()->get_index_searcher( - _fs, _index_dir.c_str(), _index_file_name, &inverted_index_cache_handle, stats, type(), - _has_null)); + RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); - auto* bkd_searcher = std::get_if<BKDIndexSearcherPtr>(&searcher_variant); + bkd_searcher = std::get_if<BKDIndexSearcherPtr>(&searcher_variant); if (bkd_searcher) { _type_info = get_scalar_type_info((FieldType)(*bkd_searcher)->type); if (_type_info == nullptr) { @@ -917,11 +752,15 @@ InvertedIndexReaderType BkdIndexReader::type() { } template <InvertedIndexQueryType QT> -InvertedIndexVisitor<QT>::InvertedIndexVisitor(roaring::Roaring* h, bool only_count) - : _hits(h), _num_hits(0), _only_count(only_count) {} +InvertedIndexVisitor<QT>::InvertedIndexVisitor(lucene::util::bkd::bkd_reader* r, + roaring::Roaring* h, bool only_count) + : _hits(h), _num_hits(0), _only_count(only_count), _reader(r) {} template <InvertedIndexQueryType QT> int InvertedIndexVisitor<QT>::matches(uint8_t* packed_value) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } bool all_greater_than_max = true; bool all_within_range = true; @@ -955,6 +794,9 @@ int InvertedIndexVisitor<QT>::matches(uint8_t* packed_value) { template <> int InvertedIndexVisitor<InvertedIndexQueryType::EQUAL_QUERY>::matches(uint8_t* packed_value) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } // if query type is equal, query_min == query_max if (_reader->num_data_dims_ == 1) { return std::memcmp(packed_value, (const uint8_t*)query_min.c_str(), @@ -979,6 +821,9 @@ int InvertedIndexVisitor<InvertedIndexQueryType::EQUAL_QUERY>::matches(uint8_t* template <> int InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>::matches(uint8_t* packed_value) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } if (_reader->num_data_dims_ == 1) { auto result = std::memcmp(packed_value, (const uint8_t*)query_max.c_str(), _reader->bytes_per_dim_); @@ -1008,6 +853,9 @@ int InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>::matches(uint8 template <> int InvertedIndexVisitor<InvertedIndexQueryType::LESS_EQUAL_QUERY>::matches(uint8_t* packed_value) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } if (_reader->num_data_dims_ == 1) { auto result = std::memcmp(packed_value, (const uint8_t*)query_max.c_str(), _reader->bytes_per_dim_); @@ -1038,6 +886,9 @@ int InvertedIndexVisitor<InvertedIndexQueryType::LESS_EQUAL_QUERY>::matches(uint template <> int InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>::matches( uint8_t* packed_value) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } if (_reader->num_data_dims_ == 1) { auto result = std::memcmp(packed_value, (const uint8_t*)query_min.c_str(), _reader->bytes_per_dim_); @@ -1062,6 +913,9 @@ int InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>::matches( template <> int InvertedIndexVisitor<InvertedIndexQueryType::GREATER_EQUAL_QUERY>::matches( uint8_t* packed_value) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } if (_reader->num_data_dims_ == 1) { auto result = std::memcmp(packed_value, (const uint8_t*)query_min.c_str(), _reader->bytes_per_dim_); @@ -1161,6 +1015,9 @@ int InvertedIndexVisitor<QT>::visit(int row_id, std::vector<uint8_t>& packed_val template <> lucene::util::bkd::relation InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>::compare( std::vector<uint8_t>& min_packed, std::vector<uint8_t>& max_packed) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } bool crosses = false; for (int dim = 0; dim < _reader->num_data_dims_; dim++) { int offset = dim * _reader->bytes_per_dim_; @@ -1190,6 +1047,9 @@ template <> lucene::util::bkd::relation InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>::compare( std::vector<uint8_t>& min_packed, std::vector<uint8_t>& max_packed) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } bool crosses = false; for (int dim = 0; dim < _reader->num_data_dims_; dim++) { int offset = dim * _reader->bytes_per_dim_; @@ -1239,6 +1099,9 @@ lucene::util::bkd::relation InvertedIndexVisitor<QT>::compare_prefix(std::vector template <InvertedIndexQueryType QT> lucene::util::bkd::relation InvertedIndexVisitor<QT>::compare(std::vector<uint8_t>& min_packed, std::vector<uint8_t>& max_packed) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } bool crosses = false; for (int dim = 0; dim < _reader->num_data_dims_; dim++) { int offset = dim * _reader->bytes_per_dim_; @@ -1273,8 +1136,12 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column InvertedIndexQueryType query_type, uint32_t segment_num_rows, roaring::Roaring* bit_map, bool skip_try) { + if (UNLIKELY(_reader == nullptr)) { + throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); + } if (!skip_try && _reader->type() == InvertedIndexReaderType::BKD) { - if (_runtime_state->query_options().inverted_index_skip_threshold > 0 && + if (_runtime_state != nullptr && + _runtime_state->query_options().inverted_index_skip_threshold > 0 && _runtime_state->query_options().inverted_index_skip_threshold < 100) { auto query_bkd_limit_percent = _runtime_state->query_options().inverted_index_skip_threshold; @@ -1317,5 +1184,4 @@ const std::map<string, string>& InvertedIndexIterator::get_index_properties() co return _reader->get_index_properties(); } -} // namespace segment_v2 -} // namespace doris +} // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 430ec917329..b8b40eece57 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -98,7 +98,6 @@ public: lucene::store::Directory* dir = nullptr); virtual InvertedIndexReaderType type() = 0; - bool indexExists(io::Path& index_file_path); [[nodiscard]] uint32_t get_index_id() const { return _index_meta.index_id(); } @@ -119,9 +118,29 @@ public: static std::unique_ptr<lucene::analysis::Analyzer> create_analyzer( InvertedIndexCtx* inverted_index_ctx); + virtual Status handle_query_cache(InvertedIndexQueryCache* cache, + const InvertedIndexQueryCache::CacheKey& cache_key, + InvertedIndexQueryCacheHandle* cache_handler, + OlapReaderStatistics* stats, roaring::Roaring* bit_map) { + if (cache->lookup(cache_key, cache_handler)) { + stats->inverted_index_query_cache_hit++; + SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); + *bit_map = *cache_handler->get_bitmap(); + return Status::OK(); + } + stats->inverted_index_query_cache_miss++; + return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss"); + } + + virtual Status handle_searcher_cache(InvertedIndexCacheHandle* inverted_index_cache_handle, + OlapReaderStatistics* stats); + + static Status create_index_searcher(IndexSearcherPtr* searcher, io::FileSystemSPtr fs, + const io::Path& index_dir, + const std::string& index_file_name, MemTracker* mem_tracker, + InvertedIndexReaderType reader_type); + protected: - bool _is_range_query(InvertedIndexQueryType query_type); - bool _is_match_query(InvertedIndexQueryType query_type); friend class InvertedIndexIterator; io::FileSystemSPtr _fs; const std::string& _path; @@ -155,12 +174,6 @@ public: InvertedIndexReaderType type() override; private: - Status normal_index_search(OlapReaderStatistics* stats, InvertedIndexQueryType query_type, - const FulltextIndexSearcherPtr& index_searcher, - bool& null_bitmap_already_read, - const std::unique_ptr<lucene::search::Query>& query, - const std::shared_ptr<roaring::Roaring>& term_match_bitmap); - Status match_index_search(OlapReaderStatistics* stats, RuntimeState* runtime_state, InvertedIndexQueryType query_type, const std::wstring& field_ws, const std::vector<std::string>& analyse_result, @@ -207,7 +220,8 @@ public: std::string query_max; public: - InvertedIndexVisitor(roaring::Roaring* hits, bool only_count = false); + InvertedIndexVisitor(lucene::util::bkd::bkd_reader* r, roaring::Roaring* hits, + bool only_count = false); ~InvertedIndexVisitor() override = default; void set_reader(lucene::util::bkd::bkd_reader* r) { _reader = r; } @@ -246,22 +260,15 @@ public: Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t* count) override; - Status invoke_bkd_try_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + Status invoke_bkd_try_query(const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr<lucene::util::bkd::bkd_reader> r, uint32_t* count); - Status invoke_bkd_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + Status invoke_bkd_query(const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr<lucene::util::bkd::bkd_reader> r, roaring::Roaring* bit_map); template <InvertedIndexQueryType QT> - Status bkd_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, std::shared_ptr<lucene::util::bkd::bkd_reader> r, - InvertedIndexVisitor<QT>* visitor); - - Status handle_cache(InvertedIndexQueryCache* cache, - const InvertedIndexQueryCache::CacheKey& cache_key, - InvertedIndexQueryCacheHandle* cache_handler, OlapReaderStatistics* stats, - roaring::Roaring* bit_map); + Status construct_bkd_query_value(const void* query_value, + std::shared_ptr<lucene::util::bkd::bkd_reader> r, + InvertedIndexVisitor<QT>* visitor); InvertedIndexReaderType type() override; Status get_bkd_reader(BKDIndexSearcherPtr& reader, OlapReaderStatistics* stats); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp new file mode 100644 index 00000000000..491d3e518eb --- /dev/null +++ b/be/src/olap/rowset/segment_v2/inverted_index_searcher.cpp @@ -0,0 +1,112 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/inverted_index_searcher.h" + +#include <CLucene/search/IndexSearcher.h> +#include <CLucene/util/bkd/bkd_reader.h> + +#include "common/config.h" +#include "olap/rowset/segment_v2/inverted_index_compound_directory.h" +#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" +#include "olap/rowset/segment_v2/inverted_index_desc.h" + +namespace doris::segment_v2 { +Status FulltextIndexSearcherBuilder::build(DorisCompoundReader* directory, + OptionalIndexSearcherPtr& output_searcher) { + auto close_directory = true; + lucene::index::IndexReader* reader = nullptr; + try { + reader = lucene::index::IndexReader::open( + directory, config::inverted_index_read_buffer_size, close_directory); + } catch (const CLuceneError& e) { + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "FulltextIndexSearcherBuilder build error: {}", e.what()); + } + bool close_reader = true; + auto index_searcher = std::make_shared<lucene::search::IndexSearcher>(reader, close_reader); + if (!index_searcher) { + output_searcher = std::nullopt; + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "FulltextIndexSearcherBuilder build index_searcher error."); + } + // NOTE: need to cl_refcount-- here, so that directory will be deleted when + // index_searcher is destroyed + _CLDECDELETE(directory) + output_searcher = index_searcher; + return Status::OK(); +} + +Status BKDIndexSearcherBuilder::build(DorisCompoundReader* directory, + OptionalIndexSearcherPtr& output_searcher) { + try { + auto close_directory = true; + auto bkd_reader = + std::make_shared<lucene::util::bkd::bkd_reader>(directory, close_directory); + if (!bkd_reader->open()) { + LOG(INFO) << "bkd index file " << directory->getPath() + "/" + directory->getFileName() + << " is empty"; + } + output_searcher = bkd_reader; + _CLDECDELETE(directory) + return Status::OK(); + } catch (const CLuceneError& e) { + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "BKDIndexSearcherBuilder build error: {}", e.what()); + } +} + +Result<std::unique_ptr<IndexSearcherBuilder>> IndexSearcherBuilder::create_index_searcher_builder( + InvertedIndexReaderType reader_type) { + std::unique_ptr<IndexSearcherBuilder> index_builder; + switch (reader_type) { + case InvertedIndexReaderType::STRING_TYPE: + case InvertedIndexReaderType::FULLTEXT: { + index_builder = std::make_unique<FulltextIndexSearcherBuilder>(); + break; + } + case InvertedIndexReaderType::BKD: { + index_builder = std::make_unique<BKDIndexSearcherBuilder>(); + break; + } + + default: + LOG(ERROR) << "InvertedIndexReaderType:" << reader_type_to_string(reader_type) + << " is not support for InvertedIndexSearcherCache"; + return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>( + "InvertedIndexSearcherCache do not support reader type.")); + } + + return index_builder; +} + +Result<IndexSearcherPtr> IndexSearcherBuilder::get_index_searcher(DorisCompoundReader* directory) { + OptionalIndexSearcherPtr result; + auto st = build(directory, result); + if (!st.ok()) { + _CLDECDELETE(directory) + return ResultError(st); + } + if (!result.has_value()) { + _CLDECDELETE(directory) + return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "InvertedIndexSearcherCache build error.")); + } + directory->getDorisIndexInput()->setIdxFileCache(false); + return *result; +} +} // namespace doris::segment_v2 \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index_searcher.h b/be/src/olap/rowset/segment_v2/inverted_index_searcher.h new file mode 100644 index 00000000000..ac32bbda7d0 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/inverted_index_searcher.h @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene.h> + +#include <memory> +#include <optional> +#include <variant> + +#include "common/status.h" +#include "inverted_index_query_type.h" + +namespace lucene { +namespace search { +class IndexSearcher; +} // namespace search + +namespace util::bkd { +class bkd_reader; +} + +} // namespace lucene + +namespace doris::segment_v2 { +using FulltextIndexSearcherPtr = std::shared_ptr<lucene::search::IndexSearcher>; +using BKDIndexSearcherPtr = std::shared_ptr<lucene::util::bkd::bkd_reader>; +using IndexSearcherPtr = std::variant<FulltextIndexSearcherPtr, BKDIndexSearcherPtr>; +using OptionalIndexSearcherPtr = std::optional<IndexSearcherPtr>; + +class InvertedIndexCacheHandle; +class DorisCompoundReader; + +class IndexSearcherBuilder { +public: + virtual Status build(DorisCompoundReader* directory, + OptionalIndexSearcherPtr& output_searcher) = 0; + virtual ~IndexSearcherBuilder() = default; + virtual Result<IndexSearcherPtr> get_index_searcher(DorisCompoundReader* directory); + static Result<std::unique_ptr<IndexSearcherBuilder>> create_index_searcher_builder( + InvertedIndexReaderType reader_type); +}; + +class FulltextIndexSearcherBuilder : public IndexSearcherBuilder { +public: + Status build(DorisCompoundReader* directory, + OptionalIndexSearcherPtr& output_searcher) override; +}; + +class BKDIndexSearcherBuilder : public IndexSearcherBuilder { +public: + Status build(DorisCompoundReader* directory, + OptionalIndexSearcherPtr& output_searcher) override; +}; +} // namespace doris::segment_v2 \ No newline at end of file diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 6e811971dab..477d52d471f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -46,6 +46,7 @@ #include "olap/rowset/segment_v2/inverted_index_cache.h" #include "olap/rowset/segment_v2/inverted_index_compound_directory.h" #include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "olap/tablet_schema.h" #include "olap/types.h" #include "runtime/collection_value.h" @@ -108,14 +109,25 @@ public: _index_writer->close(); if (config::enable_write_index_searcher_cache) { // open index searcher into cache + auto mem_tracker = + std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead"); + io::Path index_dir(_directory); auto index_file_name = InvertedIndexDescriptor::get_index_file_name( _segment_file_name, _index_meta->index_id(), _index_meta->get_index_suffix()); - auto st = InvertedIndexSearcherCache::instance()->insert( - _fs, _directory, index_file_name, InvertedIndexReaderType::FULLTEXT); - if (!st.ok()) { + IndexSearcherPtr searcher; + auto st = InvertedIndexReader::create_index_searcher( + &searcher, _fs, index_dir, index_file_name, mem_tracker.get(), + InvertedIndexReaderType::FULLTEXT); + if (UNLIKELY(!st.ok())) { LOG(ERROR) << "insert inverted index searcher cache error:" << st; + return; } + auto* cache_value = new InvertedIndexSearcherCache::CacheValue( + std::move(searcher), mem_tracker->consumption(), UnixMillis()); + InvertedIndexSearcherCache::CacheKey searcher_cache_key( + (index_dir / index_file_name).native()); + InvertedIndexSearcherCache::instance()->insert(searcher_cache_key, cache_value); } } } diff --git a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp index 64b6edec823..b11aa1fc78a 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index_searcher_cache_test.cpp @@ -31,6 +31,8 @@ #include "olap/lru_cache.h" #include "olap/olap_common.h" #include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "util/time.h" namespace doris { @@ -62,93 +64,68 @@ TEST_F(InvertedIndexSearcherCacheTest, insert_lookup) { new InvertedIndexSearcherCache(kCacheSize, 1); std::string file_name_1 = "test_1.idx"; std::string file_name_2 = "test_2.idx"; + { + auto* cache_value_1 = new InvertedIndexSearcherCache::CacheValue( + FulltextIndexSearcherPtr(nullptr), 0, UnixMillis()); + InvertedIndexSearcherCache::CacheKey searcher_cache_key1(kTestDir + "/" + file_name_1); + index_searcher_cache->insert(searcher_cache_key1, cache_value_1); + } - //insert searcher - Status status = Status::OK(); - status = index_searcher_cache->insert(fs, kTestDir, file_name_1, - InvertedIndexReaderType::FULLTEXT); - EXPECT_EQ(Status::OK(), status); - - status = index_searcher_cache->insert(fs, kTestDir, file_name_2, - InvertedIndexReaderType::FULLTEXT); - EXPECT_EQ(Status::OK(), status); - - OlapReaderStatistics stats; - bool has_null = true; + { + auto* cache_value_2 = new InvertedIndexSearcherCache::CacheValue( + FulltextIndexSearcherPtr(nullptr), 0, UnixMillis()); + InvertedIndexSearcherCache::CacheKey searcher_cache_key2(kTestDir + "/" + file_name_2); + index_searcher_cache->insert(searcher_cache_key2, cache_value_2); + } // lookup after insert { // case 1: lookup exist entry InvertedIndexCacheHandle inverted_index_cache_handle; - status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_1, &inverted_index_cache_handle, &stats, - InvertedIndexReaderType::FULLTEXT, has_null); - EXPECT_EQ(Status::OK(), status); - - auto cache_value_1 = - (InvertedIndexSearcherCache::CacheValue*)(inverted_index_cache_handle._cache) - ->value(inverted_index_cache_handle._handle); - EXPECT_GE(UnixMillis(), cache_value_1->last_visit_time); + auto index_file_path = kTestDir + "/" + file_name_1; + auto searcher_cache_key1 = index_file_path; + auto status = + index_searcher_cache->lookup(searcher_cache_key1, &inverted_index_cache_handle); + EXPECT_TRUE(status); - status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_2, &inverted_index_cache_handle, &stats, - InvertedIndexReaderType::FULLTEXT, has_null); - EXPECT_EQ(Status::OK(), status); + auto cache_value_1 = inverted_index_cache_handle.get_index_cache_value(); + EXPECT_GE(UnixMillis(), cache_value_1->last_visit_time); + } + { + InvertedIndexCacheHandle inverted_index_cache_handle; + auto index_file_path = kTestDir + "/" + file_name_2; + auto searcher_cache_key2 = index_file_path; + auto status = + index_searcher_cache->lookup(searcher_cache_key2, &inverted_index_cache_handle); + EXPECT_TRUE(status); - auto cache_value_2 = - (InvertedIndexSearcherCache::CacheValue*)(inverted_index_cache_handle._cache) - ->value(inverted_index_cache_handle._handle); + auto cache_value_2 = inverted_index_cache_handle.get_index_cache_value(); EXPECT_GE(UnixMillis(), cache_value_2->last_visit_time); } { // case 2: lookup not exist entry - std::string file_name_not_exist_1 = "test_3.idx"; - std::string file_name_not_exist_2 = "test_4.idx"; // use cache { + std::string file_name_not_exist_1 = "test_3.idx"; InvertedIndexCacheHandle inverted_index_cache_handle_1; - status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_not_exist_1, &inverted_index_cache_handle_1, &stats, - InvertedIndexReaderType::FULLTEXT, has_null); - EXPECT_EQ(Status::OK(), status); - EXPECT_FALSE(inverted_index_cache_handle_1.owned); + auto index_file_path = kTestDir + "/" + file_name_not_exist_1; + auto searcher_cache_key = index_file_path; + auto status = index_searcher_cache->lookup(searcher_cache_key, + &inverted_index_cache_handle_1); + EXPECT_FALSE(status); } // lookup again { + std::string file_name_not_exist_1 = "test_4.idx"; InvertedIndexCacheHandle inverted_index_cache_handle_1; - status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_not_exist_1, &inverted_index_cache_handle_1, &stats, - InvertedIndexReaderType::FULLTEXT, has_null); - EXPECT_EQ(Status::OK(), status); - EXPECT_FALSE(inverted_index_cache_handle_1.owned); - - auto cache_value_use_cache = - (InvertedIndexSearcherCache::CacheValue*)(inverted_index_cache_handle_1._cache) - ->value(inverted_index_cache_handle_1._handle); - EXPECT_LT(UnixMillis(), cache_value_use_cache->last_visit_time); + auto index_file_path = kTestDir + "/" + file_name_not_exist_1; + auto searcher_cache_key = index_file_path; + auto status = index_searcher_cache->lookup(searcher_cache_key, + &inverted_index_cache_handle_1); + EXPECT_FALSE(status); } - - // not use cache - InvertedIndexCacheHandle inverted_index_cache_handle_2; - status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_not_exist_2, &inverted_index_cache_handle_2, &stats, - InvertedIndexReaderType::FULLTEXT, has_null, false); - EXPECT_EQ(Status::OK(), status); - EXPECT_TRUE(inverted_index_cache_handle_2.owned); - EXPECT_EQ(nullptr, inverted_index_cache_handle_2._cache); - EXPECT_EQ(nullptr, inverted_index_cache_handle_2._handle); - - status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_not_exist_2, &inverted_index_cache_handle_2, &stats, - InvertedIndexReaderType::FULLTEXT, has_null); - EXPECT_EQ(Status::OK(), status); - EXPECT_FALSE(inverted_index_cache_handle_2.owned); - auto cache_value_use_cache_2 = - (InvertedIndexSearcherCache::CacheValue*)(inverted_index_cache_handle_2._cache) - ->value(inverted_index_cache_handle_2._handle); - EXPECT_EQ(0, cache_value_use_cache_2->last_visit_time); } delete index_searcher_cache; @@ -160,58 +137,50 @@ TEST_F(InvertedIndexSearcherCacheTest, evict_by_usage) { // no need evict std::string file_name_1 = "test_1.idx"; InvertedIndexSearcherCache::CacheKey key_1(file_name_1); - IndexCacheValuePtr cache_value_1 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_1->size = 200; - //cache_value_1->index_searcher; - cache_value_1->last_visit_time = 10; - index_searcher_cache->release(index_searcher_cache->_insert(key_1, cache_value_1.release())); + auto* cache_value_1 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 200, 10); + index_searcher_cache->insert(key_1, cache_value_1); // should evict {key_1, cache_value_1} std::string file_name_2 = "test_2.idx"; InvertedIndexSearcherCache::CacheKey key_2(file_name_2); - IndexCacheValuePtr cache_value_2 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_2->size = 800; - //cache_value_2->index_searcher; - cache_value_2->last_visit_time = 20; - index_searcher_cache->release(index_searcher_cache->_insert(key_2, cache_value_2.release())); + auto* cache_value_2 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 800, 20); + index_searcher_cache->insert(key_2, cache_value_2); { InvertedIndexCacheHandle cache_handle; // lookup key_1 - EXPECT_FALSE(index_searcher_cache->_lookup(key_1, &cache_handle)); + EXPECT_FALSE(index_searcher_cache->lookup(key_1, &cache_handle)); // lookup key_2 - EXPECT_TRUE(index_searcher_cache->_lookup(key_2, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_2, &cache_handle)); } // should evict {key_2, cache_value_2} std::string file_name_3 = "test_3.idx"; InvertedIndexSearcherCache::CacheKey key_3(file_name_3); - IndexCacheValuePtr cache_value_3 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_3->size = 400; - //cache_value_3->index_searcher; - cache_value_3->last_visit_time = 30; - index_searcher_cache->release(index_searcher_cache->_insert(key_3, cache_value_3.release())); + auto* cache_value_3 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 400, 30); + index_searcher_cache->insert(key_3, cache_value_3); { InvertedIndexCacheHandle cache_handle; // lookup key_2 - EXPECT_FALSE(index_searcher_cache->_lookup(key_2, &cache_handle)); + EXPECT_FALSE(index_searcher_cache->lookup(key_2, &cache_handle)); // lookup key_3 - EXPECT_TRUE(index_searcher_cache->_lookup(key_3, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_3, &cache_handle)); } // no need evict std::string file_name_4 = "test_4.idx"; InvertedIndexSearcherCache::CacheKey key_4(file_name_4); - IndexCacheValuePtr cache_value_4 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_4->size = 100; - //cache_value_4->index_searcher; - cache_value_4->last_visit_time = 40; - index_searcher_cache->release(index_searcher_cache->_insert(key_4, cache_value_4.release())); + auto* cache_value_4 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 100, 40); + index_searcher_cache->insert(key_4, cache_value_4); { InvertedIndexCacheHandle cache_handle; // lookup key_3 - EXPECT_TRUE(index_searcher_cache->_lookup(key_3, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_3, &cache_handle)); // lookup key_4 - EXPECT_TRUE(index_searcher_cache->_lookup(key_4, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_4, &cache_handle)); } delete index_searcher_cache; @@ -223,84 +192,76 @@ TEST_F(InvertedIndexSearcherCacheTest, evict_by_element_count_limit) { // no need evict std::string file_name_1 = "test_1.idx"; InvertedIndexSearcherCache::CacheKey key_1(file_name_1); - IndexCacheValuePtr cache_value_1 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_1->size = 20; - //cache_value_1->index_searcher; - cache_value_1->last_visit_time = 10; - index_searcher_cache->release(index_searcher_cache->_insert(key_1, cache_value_1.release())); + auto* cache_value_1 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 20, 10); + index_searcher_cache->insert(key_1, cache_value_1); // no need evict std::string file_name_2 = "test_2.idx"; InvertedIndexSearcherCache::CacheKey key_2(file_name_2); - IndexCacheValuePtr cache_value_2 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_2->size = 50; - //cache_value_2->index_searcher; - cache_value_2->last_visit_time = 20; - index_searcher_cache->release(index_searcher_cache->_insert(key_2, cache_value_2.release())); + auto* cache_value_2 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 50, 20); + index_searcher_cache->insert(key_2, cache_value_2); { InvertedIndexCacheHandle cache_handle; // lookup key_1 - EXPECT_TRUE(index_searcher_cache->_lookup(key_1, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_1, &cache_handle)); } std::this_thread::sleep_for(std::chrono::milliseconds(2)); { InvertedIndexCacheHandle cache_handle; // lookup key_2 - EXPECT_TRUE(index_searcher_cache->_lookup(key_2, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_2, &cache_handle)); } // should evict {key_1, cache_value_1} std::string file_name_3 = "test_3.idx"; InvertedIndexSearcherCache::CacheKey key_3(file_name_3); - IndexCacheValuePtr cache_value_3 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_3->size = 80; - //cache_value_3->index_searcher; - cache_value_3->last_visit_time = 30; - index_searcher_cache->release(index_searcher_cache->_insert(key_3, cache_value_3.release())); + auto* cache_value_3 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 80, 30); + index_searcher_cache->insert(key_3, cache_value_3); { InvertedIndexCacheHandle cache_handle; // lookup key_1 - EXPECT_FALSE(index_searcher_cache->_lookup(key_1, &cache_handle)); + EXPECT_FALSE(index_searcher_cache->lookup(key_1, &cache_handle)); } std::this_thread::sleep_for(std::chrono::milliseconds(2)); { InvertedIndexCacheHandle cache_handle; // lookup key_2 - EXPECT_TRUE(index_searcher_cache->_lookup(key_2, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_2, &cache_handle)); } std::this_thread::sleep_for(std::chrono::milliseconds(2)); { InvertedIndexCacheHandle cache_handle; // lookup key_3 - EXPECT_TRUE(index_searcher_cache->_lookup(key_3, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_3, &cache_handle)); } // should evict {key_2, cache_value_2} std::string file_name_4 = "test_4.idx"; InvertedIndexSearcherCache::CacheKey key_4(file_name_4); - IndexCacheValuePtr cache_value_4 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_4->size = 100; - //cache_value_4->index_searcher; - cache_value_4->last_visit_time = 40; - index_searcher_cache->release(index_searcher_cache->_insert(key_4, cache_value_4.release())); + auto* cache_value_4 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 100, 40); + index_searcher_cache->insert(key_4, cache_value_4); { InvertedIndexCacheHandle cache_handle; // lookup key_1 - EXPECT_FALSE(index_searcher_cache->_lookup(key_1, &cache_handle)); + EXPECT_FALSE(index_searcher_cache->lookup(key_1, &cache_handle)); // lookup key_2 - EXPECT_FALSE(index_searcher_cache->_lookup(key_2, &cache_handle)); + EXPECT_FALSE(index_searcher_cache->lookup(key_2, &cache_handle)); } std::this_thread::sleep_for(std::chrono::milliseconds(2)); { InvertedIndexCacheHandle cache_handle; // lookup key_3 - EXPECT_TRUE(index_searcher_cache->_lookup(key_3, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_3, &cache_handle)); } std::this_thread::sleep_for(std::chrono::milliseconds(2)); { InvertedIndexCacheHandle cache_handle; // lookup key_4 - EXPECT_TRUE(index_searcher_cache->_lookup(key_4, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_4, &cache_handle)); } delete index_searcher_cache; @@ -312,11 +273,9 @@ TEST_F(InvertedIndexSearcherCacheTest, remove_element_only_in_table) { // no need evict std::string file_name_1 = "test_1.idx"; InvertedIndexSearcherCache::CacheKey key_1(file_name_1); - IndexCacheValuePtr cache_value_1 = std::make_unique<InvertedIndexSearcherCache::CacheValue>(); - cache_value_1->size = 200; - //cache_value_1->index_searcher; - cache_value_1->last_visit_time = 10; - index_searcher_cache->release(index_searcher_cache->_insert(key_1, cache_value_1.release())); + auto* cache_value_1 = + new InvertedIndexSearcherCache::CacheValue(FulltextIndexSearcherPtr(nullptr), 200, 10); + index_searcher_cache->insert(key_1, cache_value_1); std::string file_name_2 = "test_2.idx"; InvertedIndexSearcherCache::CacheKey key_2(file_name_2); @@ -326,51 +285,46 @@ TEST_F(InvertedIndexSearcherCacheTest, remove_element_only_in_table) { { InvertedIndexCacheHandle cache_handle; // lookup key_1 - EXPECT_TRUE(index_searcher_cache->_lookup(key_1, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_1, &cache_handle)); // insert key_2, and then evict {key_1, cache_value_1} cache_value_2->size = 800; - //cache_value_2->index_searcher; + cache_value_2->index_searcher = FulltextIndexSearcherPtr(nullptr); cache_value_2->last_visit_time = 20; - index_searcher_cache->release( - index_searcher_cache->_insert(key_2, cache_value_2.release())); + index_searcher_cache->insert(key_2, cache_value_2.release()); // lookup key_2, key_2 has removed from table due to cache is full { - InvertedIndexCacheHandle cache_handle; - EXPECT_FALSE(index_searcher_cache->_lookup(key_2, &cache_handle)); + InvertedIndexCacheHandle cache_handle_2; + EXPECT_FALSE(index_searcher_cache->lookup(key_2, &cache_handle_2)); } } // lookup key_1 exist { InvertedIndexCacheHandle cache_handle; - EXPECT_TRUE(index_searcher_cache->_lookup(key_1, &cache_handle)); + EXPECT_TRUE(index_searcher_cache->lookup(key_1, &cache_handle)); } // lookup key_2 not exist, then insert into cache, and evict key_1 - OlapReaderStatistics stats; - bool has_null = false; { InvertedIndexCacheHandle inverted_index_cache_handle; - auto status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_2, &inverted_index_cache_handle, &stats, - InvertedIndexReaderType::FULLTEXT, has_null); - EXPECT_EQ(Status::OK(), status); - EXPECT_FALSE(inverted_index_cache_handle.owned); + cache_value_2.reset(new InvertedIndexSearcherCache::CacheValue( + FulltextIndexSearcherPtr(nullptr), 0, UnixMillis())); + InvertedIndexSearcherCache::CacheKey searcher_cache_key2(kTestDir + "/" + file_name_2); + index_searcher_cache->insert(searcher_cache_key2, cache_value_2.release()); } // lookup key_2 again { InvertedIndexCacheHandle inverted_index_cache_handle; - auto status = index_searcher_cache->get_index_searcher( - fs, kTestDir, file_name_2, &inverted_index_cache_handle, &stats, - InvertedIndexReaderType::FULLTEXT, has_null); - EXPECT_EQ(Status::OK(), status); - EXPECT_FALSE(inverted_index_cache_handle.owned); - auto cache_value_use_cache = - (InvertedIndexSearcherCache::CacheValue*)(inverted_index_cache_handle._cache) - ->value(inverted_index_cache_handle._handle); - EXPECT_LT(UnixMillis(), cache_value_use_cache->last_visit_time); + auto index_file_path = kTestDir + "/" + file_name_2; + auto searcher_cache_key2 = index_file_path; + auto status = + index_searcher_cache->lookup(searcher_cache_key2, &inverted_index_cache_handle); + EXPECT_TRUE(status); + + auto cache_value_use_cache = inverted_index_cache_handle.get_index_cache_value(); + EXPECT_LE(UnixMillis(), cache_value_use_cache->last_visit_time); } delete index_searcher_cache; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org