xiaokang commented on code in PR #33225: URL: https://github.com/apache/doris/pull/33225#discussion_r1549344161
########## be/src/olap/rowset/segment_v2/inverted_index/query/query.h: ########## @@ -34,10 +34,20 @@ CL_NS_USE(util) namespace doris::segment_v2 { +struct InvertedIndexQueryInfo { + std::wstring file_name; + std::vector<std::string> terms; + int32_t slop = 0; +}; + class Query { public: virtual ~Query() = default; + virtual void add(const InvertedIndexQueryInfo& query_info) { Review Comment: InvertedIndexQueryInfo is only used for pass the addition slop argument. So it's simpler to add a add function with one more slop argument. ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -261,14 +263,23 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run << search_str << "]"; try { - std::vector<std::string> analyse_result; + InvertedIndexQueryInfo query_info; InvertedIndexQueryCache::CacheKey cache_key; auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta); if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) { cache_key = {index_file_key, column_name, query_type, search_str}; - analyse_result.emplace_back(search_str); + query_info.terms.emplace_back(search_str); } else { + if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) { + std::regex slopRegex(R"((.*[^ ])\s~(\d+)$)"); Review Comment: btw, it may be more efficient to do char compare instead of normal regex ########## be/src/olap/rowset/segment_v2/inverted_index/query/query.h: ########## @@ -34,10 +34,20 @@ CL_NS_USE(util) namespace doris::segment_v2 { +struct InvertedIndexQueryInfo { + std::wstring file_name; Review Comment: field_name? ########## be/src/olap/rowset/segment_v2/inverted_index_reader.cpp: ########## @@ -261,14 +263,23 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run << search_str << "]"; try { - std::vector<std::string> analyse_result; + InvertedIndexQueryInfo query_info; InvertedIndexQueryCache::CacheKey cache_key; auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta); if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) { cache_key = {index_file_key, column_name, query_type, search_str}; - analyse_result.emplace_back(search_str); + query_info.terms.emplace_back(search_str); } else { + if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) { + std::regex slopRegex(R"((.*[^ ])\s~(\d+)$)"); Review Comment: use precompiled regex pattern instead of creating it for each query. ########## be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp: ########## @@ -74,14 +88,18 @@ void PhraseQuery::add(const std::wstring& field_name, const std::vector<std::str } void PhraseQuery::search(roaring::Roaring& roaring) { - if (_lead1.isEmpty()) { - return; - } - if (_lead2.isEmpty()) { - search_by_bitmap(roaring); - return; - } - search_by_skiplist(roaring); + // if (_lead1.isEmpty()) { + // return; + // } + // if (_lead2.isEmpty()) { + // search_by_bitmap(roaring); + // return; + // } + // search_by_skiplist(roaring); + + _searcher->_search(_query.get(), [&roaring](const int32_t docid, const float_t /*score*/) { + roaring.add(docid); Review Comment: Is the performance the same as before? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org