This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0-beta in repository https://gitbox.apache.org/repos/asf/doris.git
commit c6a2bc900d66350664ca3bf377d65ecfd3e5160e Author: airborne12 <[email protected]> AuthorDate: Wed Jun 7 20:07:48 2023 +0800 [Fix](inverted index) if range query exceeds CLucene limits, downgrade it from inverted index (#20528) CLucene may throw CL_ERR_TooManyClauses when a range query hits too many terms. In this situation, we have to downgrade from inverted index. --- be/src/common/status.h | 4 ++-- .../olap/rowset/segment_v2/inverted_index_reader.cpp | 20 +++++++++++++++++--- .../olap/rowset/segment_v2/inverted_index_reader.h | 1 + be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 ++-- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/be/src/common/status.h b/be/src/common/status.h index ba634df13c..146b4f34f7 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -262,7 +262,7 @@ E(INVERTED_INDEX_INVALID_PARAMETERS, -6000); E(INVERTED_INDEX_NOT_SUPPORTED, -6001); E(INVERTED_INDEX_CLUCENE_ERROR, -6002); E(INVERTED_INDEX_FILE_NOT_FOUND, -6003); -E(INVERTED_INDEX_FILE_HIT_LIMIT, -6004); +E(INVERTED_INDEX_BYPASS, -6004); E(INVERTED_INDEX_NO_TERMS, -6005); E(INVERTED_INDEX_RENAME_FILE_FAILED, -6006); E(INVERTED_INDEX_EVALUATE_SKIPPED, -6007); @@ -293,7 +293,7 @@ constexpr bool capture_stacktrace() { && code != ErrorCode::INVERTED_INDEX_NOT_SUPPORTED && code != ErrorCode::INVERTED_INDEX_CLUCENE_ERROR && code != ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND - && code != ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT + && code != ErrorCode::INVERTED_INDEX_BYPASS && code != ErrorCode::INVERTED_INDEX_NO_TERMS && code != ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED && code != ErrorCode::META_KEY_NOT_FOUND diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index a97eae836f..ad0deeafb2 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -73,6 +73,13 @@ namespace doris { namespace segment_v2 { +bool InvertedIndexReader::_is_range_query(InvertedIndexQueryType query_type) { + return (query_type == InvertedIndexQueryType::GREATER_THAN_QUERY || + query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY || + query_type == InvertedIndexQueryType::LESS_THAN_QUERY || + query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY); +} + bool InvertedIndexReader::_is_match_query(InvertedIndexQueryType query_type) { return (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || query_type == InvertedIndexQueryType::MATCH_ALL_QUERY || @@ -491,8 +498,15 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, result.add(docid); }); } catch (const CLuceneError& e) { - LOG(WARNING) << "CLuceneError occured, error msg: " << e.what(); - return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(); + if (_is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) { + LOG(WARNING) << "range query term exceeds limits, try to downgrade from inverted index," + << "column name:" << column_name << " search_str:" << search_str; + return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(); + } else { + LOG(WARNING) << "CLuceneError occured, error msg: " << e.what() + << "column name:" << column_name << " search_str:" << search_str; + return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(); + } } // add to cache @@ -875,7 +889,7 @@ Status InvertedIndexIterator::read_from_inverted_index(const std::string& column if (hit_count > segment_num_rows * query_bkd_limit_percent / 100) { LOG(INFO) << "hit count: " << hit_count << ", bkd inverted reached limit " << query_bkd_limit_percent << "%, segment num rows: " << segment_num_rows; - return Status::Error<ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT>(); + return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(); } } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index d68939fd5c..f9272d16fa 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -109,6 +109,7 @@ public: InvertedIndexCtx* inverted_index_ctx); protected: + bool _is_range_query(InvertedIndexQueryType query_type); bool _is_match_query(InvertedIndexQueryType query_type); friend class InvertedIndexIterator; io::FileSystemSPtr _fs; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 284ae71f1a..92717945b1 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -752,13 +752,13 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining) { if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND || - res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT || + res.code() == ErrorCode::INVERTED_INDEX_BYPASS || res.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED || (res.code() == ErrorCode::INVERTED_INDEX_NO_TERMS && need_remaining)) { // 1. INVERTED_INDEX_FILE_NOT_FOUND means index file has not been built, // usually occurs when creating a new index, queries can be downgraded // without index. - // 2. INVERTED_INDEX_FILE_HIT_LIMIT means the hit of condition by index + // 2. INVERTED_INDEX_BYPASS means the hit of condition by index // has reached the optimal limit, downgrade without index query can // improve query performance. // 3. INVERTED_INDEX_EVALUATE_SKIPPED means the inverted index is not --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
