xiaokang commented on code in PR #24511: URL: https://github.com/apache/doris/pull/24511#discussion_r1364765916
########## be/src/olap/reader.h: ########## @@ -122,6 +122,7 @@ class TabletReader { std::vector<std::pair<string, std::shared_ptr<BloomFilterFuncBase>>> bloom_filters; std::vector<std::pair<string, std::shared_ptr<BitmapFilterFuncBase>>> bitmap_filters; std::vector<std::pair<string, std::shared_ptr<HybridSetBase>>> in_filters; + Review Comment: unnecessary blank line ########## be/src/olap/rowset/segment_v2/inverted_index_reader.h: ########## @@ -72,18 +73,39 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea public: explicit InvertedIndexReader(io::FileSystemSPtr fs, const std::string& path, const TabletIndex* index_meta) - : _fs(fs), _path(path), _index_meta(*index_meta) {} + : _fs(std::move(fs)), _path(path), _index_meta(*index_meta) { + io::Path io_path(_path); + auto index_dir = io_path.parent_path(); + auto index_file_name = InvertedIndexDescriptor::get_index_file_name(io_path.filename(), + index_meta->index_id()); + auto index_file_path = index_dir / index_file_name; + _file_full_path = index_file_path; + _file_name = index_file_name; + _file_dir = index_dir.c_str(); + } + virtual Status handle_cache(InvertedIndexQueryCache* cache, + const InvertedIndexQueryCache::CacheKey& cache_key, + InvertedIndexQueryCacheHandle* cache_handler, + OlapReaderStatistics* stats, roaring::Roaring* bit_map) { + if (cache->lookup(cache_key, cache_handler)) { + stats->inverted_index_query_cache_hit++; + SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); + *bit_map = *cache_handler->get_bitmap(); + return Status::OK(); + } + stats->inverted_index_query_cache_miss++; + return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss"); Review Comment: may cause printing stacktrace and low performance. ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.h: ########## @@ -0,0 +1,220 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene/util/FutureArrays.h> +#include <CLucene/util/bkd/bkd_reader.h> + +#include <memory> +#include <string> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/status.h" +#include "io/fs/file_system.h" +#include "io/fs/path.h" +#include "olap/inverted_index_parser.h" +#include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" +#include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "olap/rowset/segment_v2/inverted_index_query_type.h" +#include "olap/tablet_schema.h" +#include "runtime/primitive_type.h" +#include "runtime/type_limit.h" + +namespace lucene { +namespace store { +class Directory; +} // namespace store +namespace util::bkd { +class bkd_docid_set_iterator; +} // namespace util::bkd +} // namespace lucene +namespace roaring { +class Roaring; +} // namespace roaring + +namespace doris { +class KeyCoder; +class TypeInfo; +struct OlapReaderStatistics; +class RuntimeState; +enum class PredicateType; + +namespace segment_v2 { + +enum class QueryCategory { POINT_QUERY, RANGE_QUERY }; + +class InvertedIndexQueryBase { Review Comment: InvertedIndexQueryParam may be a better name ########## be/src/olap/column_predicate.h: ########## @@ -52,6 +52,7 @@ enum class PredicateType { BF = 11, // BloomFilter BITMAP_FILTER = 12, // BitmapFilter MATCH = 13, // fulltext match + RANGE = 14, // BKD index range search Review Comment: Should it be checked in is_range() function? ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.h: ########## @@ -0,0 +1,220 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene/util/FutureArrays.h> +#include <CLucene/util/bkd/bkd_reader.h> + +#include <memory> +#include <string> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/status.h" +#include "io/fs/file_system.h" +#include "io/fs/path.h" +#include "olap/inverted_index_parser.h" +#include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" +#include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "olap/rowset/segment_v2/inverted_index_query_type.h" +#include "olap/tablet_schema.h" +#include "runtime/primitive_type.h" +#include "runtime/type_limit.h" + +namespace lucene { +namespace store { +class Directory; +} // namespace store +namespace util::bkd { +class bkd_docid_set_iterator; +} // namespace util::bkd +} // namespace lucene +namespace roaring { +class Roaring; +} // namespace roaring + +namespace doris { +class KeyCoder; +class TypeInfo; +struct OlapReaderStatistics; +class RuntimeState; +enum class PredicateType; + +namespace segment_v2 { + +enum class QueryCategory { POINT_QUERY, RANGE_QUERY }; Review Comment: one more type ########## be/src/olap/rowset/segment_v2/inverted_index_query_type.h: ########## @@ -32,8 +32,22 @@ enum class InvertedIndexQueryType { MATCH_ANY_QUERY = 5, MATCH_ALL_QUERY = 6, MATCH_PHRASE_QUERY = 7, + RANGE_QUERY = 8, }; +inline bool is_range_query(InvertedIndexQueryType query_type) { Review Comment: suggest name: is_half_range_query ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto* _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = nullptr; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) { // make adjustments to set to exclusive + checkLower = true; + } + + do { + lastTerm = _enumerator->term(); + if (lastTerm != nullptr && lastTerm->field() == field_name) { + if (!checkLower || _tcscmp(lastTerm->text(), lower_term->text()) > 0) { + checkLower = false; + if (upper_term != nullptr) { + int compare = _tcscmp(upper_term->text(), lastTerm->text()); + /* if beyond the upper term, or is exclusive and + * this is equal to the upper term, break out */ + if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) { + break; + } + } + TermDocs* term_doc = _reader->termDocs(lastTerm); + _term_docs.push_back(term_doc); + _term_iterators.emplace_back(term_doc); Review Comment: why put term_doc into two vectors? ########## be/src/olap/comparison_predicate.h: ########## @@ -231,6 +209,51 @@ class ComparisonPredicateBase : public ColumnPredicate { return PT == PredicateType::EQ && !ngram; } + void set_inverted_index_query_value(std::unique_ptr<InvertedIndexQueryBase>& query_value, + const Schema& schema) const override { + if (query_value == nullptr) { + auto column_desc = schema.column(_column_id); + if constexpr (PT == PredicateType::EQ || PT == PredicateType::NE) { + query_value = std::make_unique<InvertedIndexPointQuery<Type, PT>>( + column_desc->type_info()); + } else { + query_value = std::make_unique<InvertedIndexRangeQuery<Type, PredicateType::RANGE>>( + column_desc->type_info()); + } + } + if constexpr (PT == PredicateType::EQ || PT == PredicateType::NE) { + auto q = static_cast<InvertedIndexPointQuery<Type, PT>*>(query_value.get()); + q->add_value(_value, InvertedIndexQueryType::EQUAL_QUERY); + } else { + InvertedIndexQueryType query_type = InvertedIndexQueryType::UNKNOWN_QUERY; + switch (PT) { + case PredicateType::EQ: + query_type = InvertedIndexQueryType::EQUAL_QUERY; Review Comment: EQUAL_QUERY is in EQ/NE and others branch. May be we can unify InvertedIndexPointQuery to InvertedIndexRangeQuery. ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -426,6 +370,27 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run } } +Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, + const std::string& column_name, + InvertedIndexQueryBase* query_value, roaring::Roaring* bit_map) { + SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); + const auto& tmp = static_cast<InvertedIndexPointQueryI*>(query_value); + auto values = tmp->get_values(); + auto query_type = tmp->get_query_type(); + auto query_bitmap = std::make_shared<roaring::Roaring>(); + + for (auto it = values.begin(); it != values.end(); ++it) { + RETURN_IF_ERROR( + _query(stats, runtime_state, column_name, *it, query_type, query_bitmap.get())); + if (it == values.begin()) { + *bit_map = *query_bitmap; + } else { + *bit_map |= *query_bitmap; Review Comment: Is it always true to assume that the relation of values is OR ? ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto* _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = nullptr; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) { // make adjustments to set to exclusive + checkLower = true; + } + + do { + lastTerm = _enumerator->term(); + if (lastTerm != nullptr && lastTerm->field() == field_name) { + if (!checkLower || _tcscmp(lastTerm->text(), lower_term->text()) > 0) { + checkLower = false; + if (upper_term != nullptr) { + int compare = _tcscmp(upper_term->text(), lastTerm->text()); + /* if beyond the upper term, or is exclusive and + * this is equal to the upper term, break out */ + if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) { + break; + } + } + TermDocs* term_doc = _reader->termDocs(lastTerm); + _term_docs.push_back(term_doc); + _term_iterators.emplace_back(term_doc); + } + } else { + break; + } + _CLDECDELETE(lastTerm); + } while (_enumerator->next()); + } catch (CLuceneError& e) { + _CLDECDELETE(lastTerm); + _enumerator->close(); + _CLDELETE(_enumerator); + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "CLuceneError occured, error msg: {}, search_str: {}", e.what(), + query->to_string()); + } + _CLDECDELETE(lastTerm); + _enumerator->close(); + _CLDELETE(_enumerator); + return Status::OK(); +} + +void RangeQuery::search(roaring::Roaring& roaring) { + roaring::Roaring result; + auto func = [&roaring](const TermIterator& term_docs, bool first) { + roaring::Roaring result; + DocRange doc_range; + while (term_docs.readRange(&doc_range)) { + if (doc_range.type_ == DocRangeType::kMany) { + result.addMany(doc_range.doc_many_size_, doc_range.doc_many->data()); + } else { + result.addRange(doc_range.doc_range.first, doc_range.doc_range.second); + } + } + if (first) { + roaring.swap(result); + } else { + roaring |= result; + } + }; + for (int i = 0; i < _term_iterators.size(); i++) { + auto& iter = _term_iterators[i]; + if (i == 0) { Review Comment: combine to func(iter, i == 0), and then the lambda is not necessary if it's only used once. ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -193,7 +168,7 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach auto index_file_path = index_dir / index_file_name; Review Comment: can be replaced by class member _file_full_path ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto* _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = nullptr; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) { // make adjustments to set to exclusive + checkLower = true; + } + + do { + lastTerm = _enumerator->term(); + if (lastTerm != nullptr && lastTerm->field() == field_name) { + if (!checkLower || _tcscmp(lastTerm->text(), lower_term->text()) > 0) { + checkLower = false; + if (upper_term != nullptr) { + int compare = _tcscmp(upper_term->text(), lastTerm->text()); + /* if beyond the upper term, or is exclusive and + * this is equal to the upper term, break out */ + if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) { + break; + } + } + TermDocs* term_doc = _reader->termDocs(lastTerm); + _term_docs.push_back(term_doc); + _term_iterators.emplace_back(term_doc); Review Comment: what's the difference? ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.cpp: ########## @@ -0,0 +1,314 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "inverted_index_query.h" + +#include <filesystem> +#include <set> + +#include "io/fs/file_system.h" +#include "olap/column_predicate.h" +#include "olap/key_coder.h" +#include "olap/olap_common.h" +#include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/types.h" +#include "util/time.h" +#include "vec/common/string_ref.h" + +namespace doris::segment_v2 { + +template <PrimitiveType Type, PredicateType PT> +Status Helper<Type, PT>::create_and_add_value(const TypeInfo* type_info, char* value, + InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result) { + using CppType = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType; + + if (is_range_query(t)) { + auto range_query_ptr = std::make_unique<InvertedIndexRangeQuery<Type, PT>>(type_info); + RETURN_IF_ERROR(range_query_ptr->add_value(*reinterpret_cast<CppType*>(value), t)); + result = std::move(range_query_ptr); + } else { + auto point_query_ptr = std::make_unique<InvertedIndexPointQuery<Type, PT>>(type_info); + RETURN_IF_ERROR(point_query_ptr->add_value(*reinterpret_cast<CppType*>(value), t)); + result = std::move(point_query_ptr); + } + + return Status::OK(); +} + +template <PredicateType PT> +Status InvertedIndexQueryBase::create_and_add_value_from_field_type( + const TypeInfo* type_info, char* value, InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result) { + Status st; + switch (type_info->type()) { Review Comment: switch case for type is deprecated. ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto* _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = nullptr; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) { // make adjustments to set to exclusive + checkLower = true; + } + + do { + lastTerm = _enumerator->term(); + if (lastTerm != nullptr && lastTerm->field() == field_name) { + if (!checkLower || _tcscmp(lastTerm->text(), lower_term->text()) > 0) { + checkLower = false; + if (upper_term != nullptr) { + int compare = _tcscmp(upper_term->text(), lastTerm->text()); + /* if beyond the upper term, or is exclusive and + * this is equal to the upper term, break out */ + if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) { + break; + } + } + TermDocs* term_doc = _reader->termDocs(lastTerm); + _term_docs.push_back(term_doc); + _term_iterators.emplace_back(term_doc); + } + } else { + break; + } + _CLDECDELETE(lastTerm); + } while (_enumerator->next()); + } catch (CLuceneError& e) { + _CLDECDELETE(lastTerm); + _enumerator->close(); + _CLDELETE(_enumerator); + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "CLuceneError occured, error msg: {}, search_str: {}", e.what(), + query->to_string()); + } + _CLDECDELETE(lastTerm); + _enumerator->close(); + _CLDELETE(_enumerator); + return Status::OK(); +} + +void RangeQuery::search(roaring::Roaring& roaring) { + roaring::Roaring result; + auto func = [&roaring](const TermIterator& term_docs, bool first) { Review Comment: term_docs is ambiguous to _term_docs ########## be/src/olap/rowset/segment_v2/segment_iterator.cpp: ########## @@ -933,24 +937,55 @@ Status SegmentIterator::_apply_inverted_index_on_block_column_predicate( std::string column_name = _schema->column(column_id)->name(); - auto res = pred->evaluate(column_name, _inverted_index_iterators[column_id].get(), - num_rows(), &output_result); + auto process_predicate_set = [&](const auto& predicate_set) { + for (auto& orig_pred : predicate_set) { + if (origin_to_clone_predicates.contains(orig_pred)) { + auto& cloned_pred = origin_to_clone_predicates[orig_pred]; + no_need_to_pass_column_predicate_set.emplace(cloned_pred); + } else { + LOG(ERROR) + << "column:" << column_name << " pred:" << orig_pred->debug_string() + << " is not in origin_to_clone_predicates when process_predicate_set"; + } + } + }; + + auto res = pred->evaluate(*_schema, _inverted_index_iterators[column_id].get(), num_rows(), + &output_result); if (res.ok()) { if (_check_column_pred_all_push_down(column_name) && !all_predicates_are_marked_by_runtime_filter(predicate_set)) { _need_read_data_indices[column_id] = false; } - no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), predicate_set.end()); + process_predicate_set(predicate_set); + //no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), predicate_set.end()); _row_bitmap &= output_result; if (_row_bitmap.isEmpty()) { // all rows have been pruned, no need to process further predicates *continue_apply = false; } return res; } else { - //TODO:mock until AndBlockColumnPredicate evaluate is ok. - if (res.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) { + // because column predicate only process LE/LT/GT/GE predicate type, need_remaining_after_evaluate only support in_or_list + bool need_remaining_after_evaluate = false; + if (_downgrade_without_index(res, need_remaining_after_evaluate)) { + // downgrade without index query + //process_predicate_set(predicate_set); + // need to pass non-index evaluate after + for (auto& orig_pred : predicate_set) { Review Comment: duplicate with process_predicate_set() ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.cpp: ########## @@ -0,0 +1,314 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "inverted_index_query.h" + +#include <filesystem> +#include <set> + +#include "io/fs/file_system.h" +#include "olap/column_predicate.h" +#include "olap/key_coder.h" +#include "olap/olap_common.h" +#include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/types.h" +#include "util/time.h" +#include "vec/common/string_ref.h" + +namespace doris::segment_v2 { + +template <PrimitiveType Type, PredicateType PT> +Status Helper<Type, PT>::create_and_add_value(const TypeInfo* type_info, char* value, + InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result) { + using CppType = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType; + + if (is_range_query(t)) { + auto range_query_ptr = std::make_unique<InvertedIndexRangeQuery<Type, PT>>(type_info); + RETURN_IF_ERROR(range_query_ptr->add_value(*reinterpret_cast<CppType*>(value), t)); + result = std::move(range_query_ptr); + } else { + auto point_query_ptr = std::make_unique<InvertedIndexPointQuery<Type, PT>>(type_info); + RETURN_IF_ERROR(point_query_ptr->add_value(*reinterpret_cast<CppType*>(value), t)); + result = std::move(point_query_ptr); + } + + return Status::OK(); +} + +template <PredicateType PT> +Status InvertedIndexQueryBase::create_and_add_value_from_field_type( + const TypeInfo* type_info, char* value, InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result) { + Status st; + switch (type_info->type()) { + case FieldType::OLAP_FIELD_TYPE_DATETIME: { + st = Helper<PrimitiveType::TYPE_DATETIME, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DATE: { + st = Helper<PrimitiveType::TYPE_DATE, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { + st = Helper<PrimitiveType::TYPE_DATETIMEV2, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DATEV2: { + st = Helper<PrimitiveType::TYPE_DATEV2, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_TINYINT: { + st = Helper<PrimitiveType::TYPE_TINYINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_SMALLINT: { + st = Helper<PrimitiveType::TYPE_SMALLINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_INT: { + st = Helper<PrimitiveType::TYPE_INT, PT>::create_and_add_value(type_info, value, t, result); + break; + } + case FieldType::OLAP_FIELD_TYPE_LARGEINT: { + st = Helper<PrimitiveType::TYPE_LARGEINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL32: { + st = Helper<PrimitiveType::TYPE_DECIMAL32, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL64: { + st = Helper<PrimitiveType::TYPE_DECIMAL64, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: { + st = Helper<PrimitiveType::TYPE_DECIMAL128I, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DOUBLE: { + st = Helper<PrimitiveType::TYPE_DOUBLE, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_FLOAT: { + st = Helper<PrimitiveType::TYPE_FLOAT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_BIGINT: { + st = Helper<PrimitiveType::TYPE_BIGINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_BOOL: { + st = Helper<PrimitiveType::TYPE_BOOLEAN, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_CHAR: { + st = Helper<PrimitiveType::TYPE_CHAR, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_VARCHAR: { + st = Helper<PrimitiveType::TYPE_VARCHAR, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_STRING: { + st = Helper<PrimitiveType::TYPE_STRING, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + default: + return Status::NotSupported("Unsupported column type for inverted index {}", + type_info->type()); + } + if (!st.ok()) { + return st; + } + return Status::OK(); +} + +template Status InvertedIndexQueryBase::create_and_add_value_from_field_type<PredicateType::MATCH>( + const TypeInfo*, char*, InvertedIndexQueryType, std::unique_ptr<InvertedIndexQueryBase>&); + +template <PrimitiveType Type, PredicateType PT> +InvertedIndexPointQuery<Type, PT>::InvertedIndexPointQuery(const TypeInfo* type_info) + : _type_info(type_info) { + _value_key_coder = get_key_coder(type_info->type()); +} + +template <PrimitiveType Type, PredicateType PT> +std::string InvertedIndexPointQuery<Type, PT>::to_string() { + std::string result; + if constexpr (std::is_same_v<T, StringRef>) { + for (const T* v : _values) { + result += v->to_string(); Review Comment: no seperator ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.cpp: ########## @@ -0,0 +1,314 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "inverted_index_query.h" + +#include <filesystem> +#include <set> + +#include "io/fs/file_system.h" +#include "olap/column_predicate.h" +#include "olap/key_coder.h" +#include "olap/olap_common.h" +#include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/types.h" +#include "util/time.h" +#include "vec/common/string_ref.h" + +namespace doris::segment_v2 { + +template <PrimitiveType Type, PredicateType PT> +Status Helper<Type, PT>::create_and_add_value(const TypeInfo* type_info, char* value, + InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result) { + using CppType = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType; + + if (is_range_query(t)) { + auto range_query_ptr = std::make_unique<InvertedIndexRangeQuery<Type, PT>>(type_info); + RETURN_IF_ERROR(range_query_ptr->add_value(*reinterpret_cast<CppType*>(value), t)); + result = std::move(range_query_ptr); + } else { + auto point_query_ptr = std::make_unique<InvertedIndexPointQuery<Type, PT>>(type_info); + RETURN_IF_ERROR(point_query_ptr->add_value(*reinterpret_cast<CppType*>(value), t)); + result = std::move(point_query_ptr); + } + + return Status::OK(); +} + +template <PredicateType PT> +Status InvertedIndexQueryBase::create_and_add_value_from_field_type( + const TypeInfo* type_info, char* value, InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result) { + Status st; + switch (type_info->type()) { + case FieldType::OLAP_FIELD_TYPE_DATETIME: { + st = Helper<PrimitiveType::TYPE_DATETIME, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DATE: { + st = Helper<PrimitiveType::TYPE_DATE, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { + st = Helper<PrimitiveType::TYPE_DATETIMEV2, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DATEV2: { + st = Helper<PrimitiveType::TYPE_DATEV2, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_TINYINT: { + st = Helper<PrimitiveType::TYPE_TINYINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_SMALLINT: { + st = Helper<PrimitiveType::TYPE_SMALLINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_INT: { + st = Helper<PrimitiveType::TYPE_INT, PT>::create_and_add_value(type_info, value, t, result); + break; + } + case FieldType::OLAP_FIELD_TYPE_LARGEINT: { + st = Helper<PrimitiveType::TYPE_LARGEINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL32: { + st = Helper<PrimitiveType::TYPE_DECIMAL32, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL64: { + st = Helper<PrimitiveType::TYPE_DECIMAL64, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: { + st = Helper<PrimitiveType::TYPE_DECIMAL128I, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_DOUBLE: { + st = Helper<PrimitiveType::TYPE_DOUBLE, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_FLOAT: { + st = Helper<PrimitiveType::TYPE_FLOAT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_BIGINT: { + st = Helper<PrimitiveType::TYPE_BIGINT, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_BOOL: { + st = Helper<PrimitiveType::TYPE_BOOLEAN, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_CHAR: { + st = Helper<PrimitiveType::TYPE_CHAR, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_VARCHAR: { + st = Helper<PrimitiveType::TYPE_VARCHAR, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + case FieldType::OLAP_FIELD_TYPE_STRING: { + st = Helper<PrimitiveType::TYPE_STRING, PT>::create_and_add_value(type_info, value, t, + result); + break; + } + default: + return Status::NotSupported("Unsupported column type for inverted index {}", + type_info->type()); + } + if (!st.ok()) { + return st; + } + return Status::OK(); +} + +template Status InvertedIndexQueryBase::create_and_add_value_from_field_type<PredicateType::MATCH>( + const TypeInfo*, char*, InvertedIndexQueryType, std::unique_ptr<InvertedIndexQueryBase>&); + +template <PrimitiveType Type, PredicateType PT> +InvertedIndexPointQuery<Type, PT>::InvertedIndexPointQuery(const TypeInfo* type_info) + : _type_info(type_info) { + _value_key_coder = get_key_coder(type_info->type()); +} + +template <PrimitiveType Type, PredicateType PT> +std::string InvertedIndexPointQuery<Type, PT>::to_string() { + std::string result; + if constexpr (std::is_same_v<T, StringRef>) { + for (const T* v : _values) { + result += v->to_string(); + } + } else { + for (auto& v : _values) { + result += _type_info->to_string(v); + } + } + return result; +} + +template <PrimitiveType Type, PredicateType PT> +Status InvertedIndexPointQuery<Type, PT>::add_value(const T& value, InvertedIndexQueryType t) { + if constexpr (std::is_same_v<T, StringRef>) { + auto act_len = strnlen(value.data, value.size); + std::string value_str(value.data, act_len); + _values_encoded.push_back(value_str); + } else { + std::string tmp; + _value_key_coder->full_encode_ascending(&value, &tmp); + _values_encoded.push_back(tmp); + } + _values.push_back(&value); + _type = t; + return Status::OK(); +} + +template <PrimitiveType Type, PredicateType PT> +InvertedIndexRangeQuery<Type, PT>::InvertedIndexRangeQuery(const TypeInfo* type_info) + : _type_info(type_info) { + _value_key_coder = get_key_coder(type_info->type()); + auto max_v = type_limit<T>::max(); + auto min_v = type_limit<T>::min(); + _value_key_coder->full_encode_ascending(&max_v, &_high_value_encoded); + _value_key_coder->full_encode_ascending(&min_v, &_low_value_encoded); +} + +template <PrimitiveType Type, PredicateType PT> +std::string InvertedIndexRangeQuery<Type, PT>::to_string() { + std::string low_op = _inclusive_low ? ">=" : ">"; + std::string high_op = _inclusive_high ? "<=" : "<"; + std::string buffer; + if (_low_value != nullptr) { + buffer.append(_type_info->to_string(_low_value) + low_op + " "); + } + if (_high_value != nullptr) { + buffer.append(_type_info->to_string(_high_value) + high_op); + } + return buffer; +}; + +template <PrimitiveType Type, PredicateType PT> +Status InvertedIndexRangeQuery<Type, PT>::add_value(const T& value, InvertedIndexQueryType t) { + switch (t) { + case InvertedIndexQueryType::GREATER_THAN_QUERY: { + _low_value = &value; Review Comment: should check new value > old value and then replace ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.h: ########## @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene.h> +#include <CLucene/index/IndexReader.h> +#include <CLucene/index/IndexVersion.h> +#include <CLucene/index/Term.h> +#include <CLucene/search/query/TermIterator.h> + +#include "inverted_index_query.h" +#include "roaring/roaring.hh" + +CL_NS_USE(index) + +namespace doris { +using namespace segment_v2; + +class RangeQuery { Review Comment: It's confusing that RangeQuery name is similar to InvertedIndexRangeQuery but there is no inheritance relationship between them ########## be/src/olap/column_predicate.h: ########## @@ -52,6 +52,7 @@ enum class PredicateType { BF = 11, // BloomFilter BITMAP_FILTER = 12, // BitmapFilter MATCH = 13, // fulltext match + RANGE = 14, // BKD index range search Review Comment: And in some swith case ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.h: ########## @@ -0,0 +1,220 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene/util/FutureArrays.h> +#include <CLucene/util/bkd/bkd_reader.h> + +#include <memory> +#include <string> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/status.h" +#include "io/fs/file_system.h" +#include "io/fs/path.h" +#include "olap/inverted_index_parser.h" +#include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" +#include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "olap/rowset/segment_v2/inverted_index_query_type.h" +#include "olap/tablet_schema.h" +#include "runtime/primitive_type.h" +#include "runtime/type_limit.h" + +namespace lucene { +namespace store { +class Directory; +} // namespace store +namespace util::bkd { +class bkd_docid_set_iterator; +} // namespace util::bkd +} // namespace lucene +namespace roaring { +class Roaring; +} // namespace roaring + +namespace doris { +class KeyCoder; +class TypeInfo; +struct OlapReaderStatistics; +class RuntimeState; +enum class PredicateType; + +namespace segment_v2 { + +enum class QueryCategory { POINT_QUERY, RANGE_QUERY }; + +class InvertedIndexQueryBase { +public: + virtual std::string to_string() = 0; + virtual ~InvertedIndexQueryBase() = default; + virtual QueryCategory get_query_category() = 0; + [[nodiscard]] virtual PredicateType get_predicate_type() const = 0; + template <PredicateType PT> + static Status create_and_add_value_from_field_type( + const TypeInfo* type_info, char* value, InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result); +}; + +template <PrimitiveType Type, PredicateType PT> +struct Helper; + +class InvertedIndexPointQueryI : public InvertedIndexQueryBase { +public: + InvertedIndexPointQueryI() = default; + ~InvertedIndexPointQueryI() override = default; + QueryCategory get_query_category() override { return QueryCategory::POINT_QUERY; } + std::string to_string() override { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexPointQueryI"); + __builtin_unreachable(); + } + [[nodiscard]] PredicateType get_predicate_type() const override { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexPointQueryI"); + __builtin_unreachable(); + } + [[nodiscard]] virtual const std::vector<std::string>& get_values() const { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexPointQueryI"); + __builtin_unreachable(); + }; + [[nodiscard]] virtual InvertedIndexQueryType get_query_type() const { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexPointQueryI"); + __builtin_unreachable(); + }; +}; + +template <PrimitiveType Type, PredicateType PT> +class InvertedIndexPointQuery : public InvertedIndexPointQueryI { +public: + using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType; + InvertedIndexPointQuery(const TypeInfo* type_info); + + Status add_value(const T& value, InvertedIndexQueryType t); + std::string to_string() override; + [[nodiscard]] const std::vector<std::string>& get_values() const override { + return _values_encoded; + }; + [[nodiscard]] PredicateType get_predicate_type() const override { return PT; }; + [[nodiscard]] InvertedIndexQueryType get_query_type() const override { return _type; }; + +private: + std::vector<std::string> _values_encoded; + const KeyCoder* _value_key_coder {}; + const TypeInfo* _type_info {}; + std::vector<const T*> _values; + InvertedIndexQueryType _type; +}; + +template <PrimitiveType Type, PredicateType PT> +struct Helper { + static Status create_and_add_value(const TypeInfo* type_info, char* value, + InvertedIndexQueryType t, + std::unique_ptr<InvertedIndexQueryBase>& result); +}; + +class InvertedIndexRangeQueryI : public InvertedIndexQueryBase { +public: + InvertedIndexRangeQueryI() = default; + ~InvertedIndexRangeQueryI() override = default; + [[nodiscard]] virtual const std::string& get_low_value() const = 0; + [[nodiscard]] virtual const std::string& get_high_value() const = 0; + virtual bool low_value_is_null() = 0; + virtual bool high_value_is_null() = 0; + QueryCategory get_query_category() override { return QueryCategory::RANGE_QUERY; } + std::string to_string() override { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexRangeQueryI"); + __builtin_unreachable(); + }; + [[nodiscard]] PredicateType get_predicate_type() const override { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexRangeQueryI"); + __builtin_unreachable(); + }; + [[nodiscard]] virtual bool is_low_value_inclusive() const { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexRangeQueryI"); + __builtin_unreachable(); + } + [[nodiscard]] virtual bool is_high_value_inclusive() const { + LOG_FATAL("Execution reached an undefined behavior code path in InvertedIndexRangeQueryI"); + __builtin_unreachable(); + } +}; + +class BinaryType { Review Comment: BinaryValue may be better ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto* _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = nullptr; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) { // make adjustments to set to exclusive + checkLower = true; + } + + do { + lastTerm = _enumerator->term(); + if (lastTerm != nullptr && lastTerm->field() == field_name) { + if (!checkLower || _tcscmp(lastTerm->text(), lower_term->text()) > 0) { + checkLower = false; + if (upper_term != nullptr) { + int compare = _tcscmp(upper_term->text(), lastTerm->text()); + /* if beyond the upper term, or is exclusive and + * this is equal to the upper term, break out */ + if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) { + break; + } + } + TermDocs* term_doc = _reader->termDocs(lastTerm); + _term_docs.push_back(term_doc); + _term_iterators.emplace_back(term_doc); Review Comment: can we process term_doc one by one and release term_doc after search to avoid too many open term_doc. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org