github-actions[bot] commented on code in PR #24511: URL: https://github.com/apache/doris/pull/24511#discussion_r1366695701
########## be/src/olap/column_predicate.h: ########## @@ -208,6 +209,14 @@ class ColumnPredicate { virtual void set_page_ng_bf(std::unique_ptr<segment_v2::BloomFilter>) { DCHECK(false) << "should not reach here"; } + + virtual Status set_inverted_index_query_value(std::unique_ptr<InvertedIndexQueryBase>&, + const Schema&) const { Review Comment: warning: all parameters should be named in a function [readability-named-parameter] ```suggestion virtual Status set_inverted_index_query_value(std::unique_ptr<InvertedIndexQueryBase>& /*unused*/, const Schema& /*unused*/) const { ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = NULL; Review Comment: warning: use nullptr [modernize-use-nullptr] ```suggestion Term* lastTerm = nullptr; ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.cpp: ########## @@ -0,0 +1,313 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "inverted_index_query.h" + +#include <string.h> Review Comment: warning: inclusion of deprecated C++ header 'string.h'; consider using 'cstring' instead [modernize-deprecated-headers] ```suggestion #include <cstring> ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.h: ########## @@ -0,0 +1,223 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene/util/FutureArrays.h> +#include <CLucene/util/bkd/bkd_reader.h> +#include <stdint.h> Review Comment: warning: inclusion of deprecated C++ header 'stdint.h'; consider using 'cstdint' instead [modernize-deprecated-headers] ```suggestion #include <cstdint> ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -578,69 +503,115 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, read_null_bitmap(&null_bitmap_cache_handle, index_searcher->getReader()->directory())); try { - if (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || - query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || - query_type == InvertedIndexQueryType::EQUAL_QUERY) { - SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - index_searcher->_search(query.get(), [&result](DocRange* doc_range) { - if (doc_range->type_ == DocRangeType::kMany) { - result.addMany(doc_range->doc_many_size_, doc_range->doc_many->data()); - } else { - result.addRange(doc_range->doc_range.first, doc_range->doc_range.second); - } - }); - } else { - SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - index_searcher->_search(query.get(), - [&result](const int32_t docid, const float_t /*score*/) { - // docid equal to rowid in segment - result.add(docid); - }); - } + SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); + range_query.search(result); + } catch (const CLuceneError& e) { - if (_is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) { + if (e.number() == CL_ERR_TooManyClauses) { return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>( "range query term exceeds limits, try to downgrade from inverted index, column " "name:{}, search_str:{}", - column_name, search_str); + column_name, query->to_string()); } else { return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( "CLuceneError occured, error msg: {}, column name: {}, search_str: {}", - e.what(), column_name, search_str); + e.what(), column_name, query->to_string()); } } + // add to cache + std::shared_ptr<roaring::Roaring> range_query_bitmap = + std::make_shared<roaring::Roaring>(result); + range_query_bitmap->runOptimize(); + cache->insert(cache_key, range_query_bitmap, &cache_handler); + + bit_map->swap(result); + return Status::OK(); +} + +Status StringTypeInvertedIndexReader::handle_point_query(const std::string& column_name, + OlapReaderStatistics* stats, + InvertedIndexPointQueryI* query, + roaring::Roaring* bit_map) { + std::wstring column_name_ws = std::wstring(column_name.begin(), column_name.end()); + auto values = query->get_values(); + + InvertedIndexQueryCache::CacheKey cache_key {_file_full_path, column_name, + query->get_query_type(), query->to_string()}; + auto cache = InvertedIndexQueryCache::instance(); Review Comment: warning: 'auto cache' can be declared as 'auto *cache' [readability-qualified-auto] ```suggestion auto *cache = InvertedIndexQueryCache::instance(); ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = NULL; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) // make adjustments to set to exclusive + checkLower = true; + + do { + lastTerm = _enumerator->term(); + if (lastTerm != NULL && lastTerm->field() == field_name) { + if (!checkLower || _tcscmp(lastTerm->text(), lower_term->text()) > 0) { + checkLower = false; + if (upper_term != NULL) { + int compare = _tcscmp(upper_term->text(), lastTerm->text()); + /* if beyond the upper term, or is exclusive and + * this is equal to the upper term, break out */ + if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) + break; + } + TermDocs* term_doc = _reader->termDocs(lastTerm); + _term_docs.push_back(term_doc); + _term_iterators.emplace_back(term_doc); + } + } else { + break; + } + _CLDECDELETE(lastTerm); + } while (_enumerator->next()); + } catch (CLuceneError& e) { + _CLDECDELETE(lastTerm); + _enumerator->close(); + _CLDELETE(_enumerator); + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "CLuceneError occured, error msg: {}, search_str: {}", e.what(), + query->to_string()); + } + _CLDECDELETE(lastTerm); + _enumerator->close(); + _CLDELETE(_enumerator); + return Status::OK(); +} + +void RangeQuery::search(roaring::Roaring& roaring) { Review Comment: warning: method 'search' can be made static [readability-convert-member-functions-to-static] be/src/olap/rowset/segment_v2/inverted_index/query/range_query.h:39: ```diff - void search(roaring::Roaring& roaring); + static void search(roaring::Roaring& roaring); ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -413,6 +366,27 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run } } +Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, Review Comment: warning: method 'query' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -827,47 +752,66 @@ InvertedIndexReaderType BkdIndexReader::type() { return InvertedIndexReaderType::BKD; } -InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryType query_type, +InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryBase* query_value, bool only_count) - : _hits(h), _num_hits(0), _only_count(only_count), _query_type(query_type) {} + : _hits(h), + _num_hits(0), + _only_count(only_count), + _low_op(PredicateType::GT), + _high_op(PredicateType::LT) { + if (query_value->get_query_category() == QueryCategory::RANGE_QUERY) { + auto range_query = reinterpret_cast<InvertedIndexRangeQueryI*>(query_value); + query_max = range_query->get_high_value(); + query_min = range_query->get_low_value(); + if (range_query->is_low_value_inclusive()) { + _low_op = PredicateType::GE; + } + if (range_query->is_high_value_inclusive()) { + _high_op = PredicateType::LE; + } + } else if (query_value->get_query_category() == QueryCategory::POINT_QUERY) { + auto point_query = reinterpret_cast<InvertedIndexPointQueryI*>(query_value); Review Comment: warning: 'auto point_query' can be declared as 'auto *point_query' [readability-qualified-auto] ```suggestion auto *point_query = reinterpret_cast<InvertedIndexPointQueryI*>(query_value); ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.h: ########## @@ -184,14 +230,19 @@ class InvertedIndexVisitor : public lucene::util::bkd::bkd_reader::intersect_vis uint32_t _num_hits; bool _only_count; lucene::util::bkd::bkd_reader* _reader; - InvertedIndexQueryType _query_type; + PredicateType _low_op; + PredicateType _high_op; public: - std::string query_min; - std::string query_max; + BinaryType query_min; + BinaryType query_max; + std::vector<BinaryType> query_points; + //std::string query_min {}; + //std::string query_max {}; + //std::vector<std::string> query_points {}; public: - InvertedIndexVisitor(roaring::Roaring* hits, InvertedIndexQueryType query_type, + InvertedIndexVisitor(roaring::Roaring* hits, InvertedIndexQueryBase* range_query, Review Comment: warning: function 'std::doris::segment_v2::InvertedIndexVisitor::InvertedIndexVisitor' has a definition with different parameter names [readability-inconsistent-declaration-parameter-name] ```cpp InvertedIndexVisitor(roaring::Roaring* hits, InvertedIndexQueryBase* range_query, ^ ``` <details> <summary>Additional context</summary> **be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:754:** the definition seen here ```cpp InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryBase* query_value, ^ ``` **be/src/olap/rowset/segment_v2/inverted_index_reader.h:244:** differing parameters are named here: ('range_query'), in definition: ('query_value') ```cpp InvertedIndexVisitor(roaring::Roaring* hits, InvertedIndexQueryBase* range_query, ^ ``` </details> ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -650,50 +621,27 @@ Status BkdIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* r return Status::OK(); } -Status BkdIndexReader::bkd_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, - std::shared_ptr<lucene::util::bkd::bkd_reader> r, +Status BkdIndexReader::bkd_query(std::shared_ptr<lucene::util::bkd::bkd_reader> r, Review Comment: warning: method 'bkd_query' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status BkdIndexReader::bkd_query(std::shared_ptr<lucene::util::bkd::bkd_reader> r, ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.h: ########## @@ -0,0 +1,223 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <CLucene/util/FutureArrays.h> +#include <CLucene/util/bkd/bkd_reader.h> +#include <stdint.h> + +#include <memory> +#include <string> +#include <type_traits> +#include <utility> +#include <vector> + +#include "common/status.h" +#include "io/fs/file_system.h" +#include "io/fs/path.h" +#include "olap/inverted_index_parser.h" +#include "olap/rowset/segment_v2/inverted_index_cache.h" +#include "olap/rowset/segment_v2/inverted_index_compound_reader.h" +#include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "olap/rowset/segment_v2/inverted_index_query_type.h" +#include "olap/tablet_schema.h" +#include "runtime/primitive_type.h" +#include "runtime/type_limit.h" + +namespace lucene { +namespace store { +class Directory; +} // namespace store +namespace util { +namespace bkd { Review Comment: warning: nested namespaces can be concatenated [modernize-concat-nested-namespaces] ```suggestion namespace util::bkd { ``` be/src/olap/rowset/segment_v2/inverted_index/query/inverted_index_query.h:48: ```diff - } // namespace bkd - } // namespace util + } // namespace util ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = NULL; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) // make adjustments to set to exclusive + checkLower = true; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if (!query->is_low_value_inclusive()) { // make adjustments to set to exclusive checkLower = true; } ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { Review Comment: warning: use '= default' to define a trivial destructor [modernize-use-equals-default] ```cpp RangeQuery::~RangeQuery() { ^ ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -827,47 +752,66 @@ InvertedIndexReaderType BkdIndexReader::type() { return InvertedIndexReaderType::BKD; } -InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryType query_type, +InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryBase* query_value, bool only_count) - : _hits(h), _num_hits(0), _only_count(only_count), _query_type(query_type) {} + : _hits(h), + _num_hits(0), + _only_count(only_count), + _low_op(PredicateType::GT), + _high_op(PredicateType::LT) { + if (query_value->get_query_category() == QueryCategory::RANGE_QUERY) { + auto range_query = reinterpret_cast<InvertedIndexRangeQueryI*>(query_value); Review Comment: warning: 'auto range_query' can be declared as 'auto *range_query' [readability-qualified-auto] ```suggestion auto *range_query = reinterpret_cast<InvertedIndexRangeQueryI*>(query_value); ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -827,47 +752,66 @@ InvertedIndexReaderType BkdIndexReader::type() { return InvertedIndexReaderType::BKD; } -InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryType query_type, +InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryBase* query_value, bool only_count) - : _hits(h), _num_hits(0), _only_count(only_count), _query_type(query_type) {} + : _hits(h), + _num_hits(0), + _only_count(only_count), + _low_op(PredicateType::GT), + _high_op(PredicateType::LT) { + if (query_value->get_query_category() == QueryCategory::RANGE_QUERY) { + auto range_query = reinterpret_cast<InvertedIndexRangeQueryI*>(query_value); + query_max = range_query->get_high_value(); + query_min = range_query->get_low_value(); + if (range_query->is_low_value_inclusive()) { + _low_op = PredicateType::GE; + } + if (range_query->is_high_value_inclusive()) { + _high_op = PredicateType::LE; + } + } else if (query_value->get_query_category() == QueryCategory::POINT_QUERY) { + auto point_query = reinterpret_cast<InvertedIndexPointQueryI*>(query_value); + for (const std::string& v : point_query->get_values()) { + query_points.emplace_back(v); + } + // =1 equals 1<= && >=1 + _low_op = PredicateType::GE; + _high_op = PredicateType::LE; + } +} + +bool InvertedIndexVisitor::_matches(const BinaryType& packed_value, const BinaryType& qmax, + const BinaryType& qmin) { + bool minInside = (_low_op == PredicateType::GE ? packed_value >= qmin : packed_value > qmin); + bool maxInside = (_high_op == PredicateType::LE ? packed_value <= qmax : packed_value < qmax); + if (minInside && maxInside) { + return true; + } + return false; +} bool InvertedIndexVisitor::matches(uint8_t* packed_value) { - for (int dim = 0; dim < _reader->num_data_dims_; dim++) { - int offset = dim * _reader->bytes_per_dim_; - if (_query_type == InvertedIndexQueryType::LESS_THAN_QUERY) { - if (lucene::util::FutureArrays::CompareUnsigned( - packed_value, offset, offset + _reader->bytes_per_dim_, - (const uint8_t*)query_max.c_str(), offset, - offset + _reader->bytes_per_dim_) >= 0) { - // Doc's value is too high, in this dimension - return false; - } - } else if (_query_type == InvertedIndexQueryType::GREATER_THAN_QUERY) { - if (lucene::util::FutureArrays::CompareUnsigned( - packed_value, offset, offset + _reader->bytes_per_dim_, - (const uint8_t*)query_min.c_str(), offset, - offset + _reader->bytes_per_dim_) <= 0) { - // Doc's value is too high, in this dimension + auto dim_match = [&](const BinaryType& qmax, const BinaryType& qmin) -> bool { + for (int dim = 0; dim < _reader->num_data_dims_; dim++) { + int offset = dim * _reader->bytes_per_dim_; + if (!_matches(BinaryType(packed_value + offset, _reader->bytes_per_dim_), + BinaryType(qmax._data + offset, _reader->bytes_per_dim_), + BinaryType(qmin._data + offset, _reader->bytes_per_dim_))) { return false; } - } else { - if (lucene::util::FutureArrays::CompareUnsigned( - packed_value, offset, offset + _reader->bytes_per_dim_, - (const uint8_t*)query_min.c_str(), offset, - offset + _reader->bytes_per_dim_) < 0) { - // Doc's value is too low, in this dimension - return false; - } - if (lucene::util::FutureArrays::CompareUnsigned( - packed_value, offset, offset + _reader->bytes_per_dim_, - (const uint8_t*)query_max.c_str(), offset, - offset + _reader->bytes_per_dim_) > 0) { - // Doc's value is too high, in this dimension - return false; + } + return true; + }; + if (!query_points.empty()) { + for (auto query_point : query_points) { + if (dim_match(query_point, query_point)) { + return true; } } + return false; + } else { + return dim_match(query_max, query_min); } Review Comment: warning: do not use 'else' after 'return' [readability-else-after-return] ```suggestion } return dim_match(query_max, query_min); ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = NULL; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) // make adjustments to set to exclusive + checkLower = true; + + do { + lastTerm = _enumerator->term(); + if (lastTerm != NULL && lastTerm->field() == field_name) { + if (!checkLower || _tcscmp(lastTerm->text(), lower_term->text()) > 0) { + checkLower = false; + if (upper_term != NULL) { + int compare = _tcscmp(upper_term->text(), lastTerm->text()); + /* if beyond the upper term, or is exclusive and + * this is equal to the upper term, break out */ + if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) + break; Review Comment: warning: statement should be inside braces [readability-braces-around-statements] ```suggestion if ((compare < 0) || (!query->is_high_value_inclusive() && compare == 0)) { break; } ``` ########## be/src/olap/rowset/segment_v2/inverted_index/query/range_query.cpp: ########## @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "range_query.h" + +namespace doris { + +RangeQuery::RangeQuery(IndexReader* reader) : _reader(reader) {} + +RangeQuery::~RangeQuery() { + for (auto& term_doc : _term_docs) { + if (term_doc) { + _CLDELETE(term_doc); + } + } +} + +Status RangeQuery::add(const std::wstring& field_name, InvertedIndexRangeQueryI* query) { + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> lower_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + std::unique_ptr<lucene::index::Term, void (*)(lucene::index::Term*)> upper_term( + nullptr, [](lucene::index::Term* term) { _CLDECDELETE(term); }); + + if (query->low_value_is_null() && query->high_value_is_null()) { + return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "StringTypeInvertedIndexReader::handle_range_query error: both low_value and " + "high_value is null"); + } + auto search_low = query->get_low_value(); + if (!query->low_value_is_null()) { + std::wstring search_low_ws = StringUtil::string_to_wstring(search_low); + lower_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_low_ws.c_str())); + } else { + lower_term.reset(_CLNEW Term(field_name.c_str(), L"")); + } + auto search_high = query->get_high_value(); + if (!query->high_value_is_null()) { + std::wstring search_high_ws = StringUtil::string_to_wstring(search_high); + upper_term.reset(_CLNEW lucene::index::Term(field_name.c_str(), search_high_ws.c_str())); + } + + auto _enumerator = _reader->terms(lower_term.get()); + Term* lastTerm = NULL; + try { + bool checkLower = false; + if (!query->is_low_value_inclusive()) // make adjustments to set to exclusive + checkLower = true; + + do { + lastTerm = _enumerator->term(); + if (lastTerm != NULL && lastTerm->field() == field_name) { Review Comment: warning: use nullptr [modernize-use-nullptr] ```suggestion if (lastTerm != nullptr && lastTerm->field() == field_name) { ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -300,24 +266,16 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run str_tokens += token; str_tokens += " "; } - - auto cache = InvertedIndexQueryCache::instance(); - InvertedIndexQueryCache::CacheKey cache_key; - cache_key.index_path = index_file_path; - cache_key.column_name = column_name; - cache_key.query_type = query_type; - //auto str_tokens = lucene_wcstoutf8string(wstr_tokens.c_str(), wstr_tokens.length()); - cache_key.value.swap(str_tokens); - InvertedIndexQueryCacheHandle cache_handle; std::shared_ptr<roaring::Roaring> term_match_bitmap = nullptr; - if (cache->lookup(cache_key, &cache_handle)) { - stats->inverted_index_query_cache_hit++; - term_match_bitmap = cache_handle.get_bitmap(); + InvertedIndexQueryCache::CacheKey cache_key {_file_full_path, column_name, query_type, + str_tokens}; + auto cache = InvertedIndexQueryCache::instance(); Review Comment: warning: 'auto cache' can be declared as 'auto *cache' [readability-qualified-auto] ```suggestion auto *cache = InvertedIndexQueryCache::instance(); ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -578,69 +503,115 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, read_null_bitmap(&null_bitmap_cache_handle, index_searcher->getReader()->directory())); try { - if (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || - query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || - query_type == InvertedIndexQueryType::EQUAL_QUERY) { - SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - index_searcher->_search(query.get(), [&result](DocRange* doc_range) { - if (doc_range->type_ == DocRangeType::kMany) { - result.addMany(doc_range->doc_many_size_, doc_range->doc_many->data()); - } else { - result.addRange(doc_range->doc_range.first, doc_range->doc_range.second); - } - }); - } else { - SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - index_searcher->_search(query.get(), - [&result](const int32_t docid, const float_t /*score*/) { - // docid equal to rowid in segment - result.add(docid); - }); - } + SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); + range_query.search(result); + } catch (const CLuceneError& e) { - if (_is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) { + if (e.number() == CL_ERR_TooManyClauses) { return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>( "range query term exceeds limits, try to downgrade from inverted index, column " "name:{}, search_str:{}", - column_name, search_str); + column_name, query->to_string()); } else { return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( "CLuceneError occured, error msg: {}, column name: {}, search_str: {}", - e.what(), column_name, search_str); + e.what(), column_name, query->to_string()); } } + // add to cache + std::shared_ptr<roaring::Roaring> range_query_bitmap = + std::make_shared<roaring::Roaring>(result); + range_query_bitmap->runOptimize(); + cache->insert(cache_key, range_query_bitmap, &cache_handler); + + bit_map->swap(result); + return Status::OK(); +} + +Status StringTypeInvertedIndexReader::handle_point_query(const std::string& column_name, + OlapReaderStatistics* stats, + InvertedIndexPointQueryI* query, + roaring::Roaring* bit_map) { + std::wstring column_name_ws = std::wstring(column_name.begin(), column_name.end()); + auto values = query->get_values(); + + InvertedIndexQueryCache::CacheKey cache_key {_file_full_path, column_name, + query->get_query_type(), query->to_string()}; + auto cache = InvertedIndexQueryCache::instance(); + InvertedIndexQueryCacheHandle cache_handler; + auto cache_status = handle_cache(cache, cache_key, &cache_handler, stats, bit_map); + if (cache_status.ok()) { + return Status::OK(); + } + + roaring::Roaring result; + InvertedIndexCacheHandle inverted_index_cache_handle; + static_cast<void>(InvertedIndexSearcherCache::instance()->get_index_searcher( + _fs, _file_dir, _file_name, &inverted_index_cache_handle, stats)); + auto index_searcher = inverted_index_cache_handle.get_index_searcher(); + DisjunctionQuery dis_query(index_searcher->getReader()); + dis_query.add(column_name_ws, values); + try { + SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); + dis_query.search(result); + } catch (const CLuceneError& e) { + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + "CLuceneError occured, error msg: {}, column name: {}, search_str: {}", e.what(), + column_name, query->to_string()); + } + + // try to reuse index_searcher's directory to read null_bitmap to cache + // to avoid open directory additionally for null_bitmap + InvertedIndexQueryCacheHandle null_bitmap_cache_handle; + static_cast<void>( + read_null_bitmap(&null_bitmap_cache_handle, index_searcher->getReader()->directory())); + // add to cache std::shared_ptr<roaring::Roaring> term_match_bitmap = std::make_shared<roaring::Roaring>(result); term_match_bitmap->runOptimize(); - cache->insert(cache_key, term_match_bitmap, &cache_handle); + cache->insert(cache_key, term_match_bitmap, &cache_handler); bit_map->swap(result); return Status::OK(); } +Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, Review Comment: warning: method 'query' can be made static [readability-convert-member-functions-to-static] ```suggestion static Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -827,47 +752,66 @@ InvertedIndexReaderType BkdIndexReader::type() { return InvertedIndexReaderType::BKD; } -InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryType query_type, +InvertedIndexVisitor::InvertedIndexVisitor(roaring::Roaring* h, InvertedIndexQueryBase* query_value, bool only_count) - : _hits(h), _num_hits(0), _only_count(only_count), _query_type(query_type) {} + : _hits(h), + _num_hits(0), + _only_count(only_count), + _low_op(PredicateType::GT), + _high_op(PredicateType::LT) { + if (query_value->get_query_category() == QueryCategory::RANGE_QUERY) { + auto range_query = reinterpret_cast<InvertedIndexRangeQueryI*>(query_value); + query_max = range_query->get_high_value(); + query_min = range_query->get_low_value(); + if (range_query->is_low_value_inclusive()) { + _low_op = PredicateType::GE; + } + if (range_query->is_high_value_inclusive()) { + _high_op = PredicateType::LE; + } + } else if (query_value->get_query_category() == QueryCategory::POINT_QUERY) { + auto point_query = reinterpret_cast<InvertedIndexPointQueryI*>(query_value); + for (const std::string& v : point_query->get_values()) { + query_points.emplace_back(v); + } + // =1 equals 1<= && >=1 + _low_op = PredicateType::GE; + _high_op = PredicateType::LE; + } +} + +bool InvertedIndexVisitor::_matches(const BinaryType& packed_value, const BinaryType& qmax, + const BinaryType& qmin) { + bool minInside = (_low_op == PredicateType::GE ? packed_value >= qmin : packed_value > qmin); + bool maxInside = (_high_op == PredicateType::LE ? packed_value <= qmax : packed_value < qmax); + if (minInside && maxInside) { + return true; Review Comment: warning: redundant boolean literal in conditional return statement [readability-simplify-boolean-expr] be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:786: ```diff - if (minInside && maxInside) { - return true; - } - return false; + return minInside && maxInside; ``` ########## be/src/olap/rowset/segment_v2/inverted_index_reader.h: ########## @@ -207,17 +258,22 @@ class InvertedIndexVisitor : public lucene::util::bkd::bkd_reader::intersect_vis void visit(lucene::util::bkd::bkd_docid_set_iterator* iter, std::vector<uint8_t>& packed_value) override; bool matches(uint8_t* packed_value); + //bool _matches(uint8_t* packed_value, const uint8_t* query_max, const uint8_t* query_min); + bool _matches(const BinaryType& packed_value, const BinaryType& query_max, Review Comment: warning: function 'std::doris::segment_v2::InvertedIndexVisitor::_matches' has a definition with different parameter names [readability-inconsistent-declaration-parameter-name] ```cpp bool _matches(const BinaryType& packed_value, const BinaryType& query_max, ^ ``` <details> <summary>Additional context</summary> **be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:782:** the definition seen here ```cpp bool InvertedIndexVisitor::_matches(const BinaryType& packed_value, const BinaryType& qmax, ^ ``` **be/src/olap/rowset/segment_v2/inverted_index_reader.h:261:** differing parameters are named here: ('query_max', 'query_min'), in definition: ('qmax', 'qmin') ```cpp bool _matches(const BinaryType& packed_value, const BinaryType& query_max, ^ ``` </details> ########## be/src/olap/rowset/segment_v2/inverted_index_reader.h: ########## @@ -72,18 +76,40 @@ class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexRea public: explicit InvertedIndexReader(io::FileSystemSPtr fs, const std::string& path, const TabletIndex* index_meta) - : _fs(fs), _path(path), _index_meta(*index_meta) {} + : _fs(fs), _path(path), _index_meta(*index_meta) { + io::Path io_path(_path); + auto index_dir = io_path.parent_path(); + auto index_file_name = InvertedIndexDescriptor::get_index_file_name(io_path.filename(), + index_meta->index_id()); + auto index_file_path = index_dir / index_file_name; + _file_full_path = index_file_path; + _file_name = index_file_name; + _file_dir = index_dir.c_str(); + } + virtual Status handle_cache(InvertedIndexQueryCache* cache, + const InvertedIndexQueryCache::CacheKey& cache_key, + InvertedIndexQueryCacheHandle* cache_handler, + OlapReaderStatistics* stats, roaring::Roaring* bit_map) { + if (cache->lookup(cache_key, cache_handler)) { + stats->inverted_index_query_cache_hit++; + SCOPED_RAW_TIMER(&stats->inverted_index_query_bitmap_copy_timer); + *bit_map = *cache_handler->get_bitmap(); + return Status::OK(); + } else { + stats->inverted_index_query_cache_miss++; + return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss"); + } Review Comment: warning: do not use 'else' after 'return' [readability-else-after-return] ```suggestion } stats->inverted_index_query_cache_miss++; return Status::Error<ErrorCode::KEY_NOT_FOUND>("cache miss"); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org