xiaokang commented on code in PR #33225:
URL: https://github.com/apache/doris/pull/33225#discussion_r1549344161


##########
be/src/olap/rowset/segment_v2/inverted_index/query/query.h:
##########
@@ -34,10 +34,20 @@ CL_NS_USE(util)
 
 namespace doris::segment_v2 {
 
+struct InvertedIndexQueryInfo {
+    std::wstring file_name;
+    std::vector<std::string> terms;
+    int32_t slop = 0;
+};
+
 class Query {
 public:
     virtual ~Query() = default;
 
+    virtual void add(const InvertedIndexQueryInfo& query_info) {

Review Comment:
   InvertedIndexQueryInfo is only used for pass the addition slop argument. So 
it's simpler to add a add function with one more slop argument.



##########
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:
##########
@@ -261,14 +263,23 @@ Status FullTextIndexReader::query(OlapReaderStatistics* 
stats, RuntimeState* run
               << search_str << "]";
 
     try {
-        std::vector<std::string> analyse_result;
+        InvertedIndexQueryInfo query_info;
         InvertedIndexQueryCache::CacheKey cache_key;
         auto index_file_key = 
_inverted_index_file_reader->get_index_file_key(&_index_meta);
 
         if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
             cache_key = {index_file_key, column_name, query_type, search_str};
-            analyse_result.emplace_back(search_str);
+            query_info.terms.emplace_back(search_str);
         } else {
+            if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
+                std::regex slopRegex(R"((.*[^ ])\s~(\d+)$)");

Review Comment:
   btw, it may be more efficient to do char compare instead of normal regex



##########
be/src/olap/rowset/segment_v2/inverted_index/query/query.h:
##########
@@ -34,10 +34,20 @@ CL_NS_USE(util)
 
 namespace doris::segment_v2 {
 
+struct InvertedIndexQueryInfo {
+    std::wstring file_name;

Review Comment:
   field_name?



##########
be/src/olap/rowset/segment_v2/inverted_index_reader.cpp:
##########
@@ -261,14 +263,23 @@ Status FullTextIndexReader::query(OlapReaderStatistics* 
stats, RuntimeState* run
               << search_str << "]";
 
     try {
-        std::vector<std::string> analyse_result;
+        InvertedIndexQueryInfo query_info;
         InvertedIndexQueryCache::CacheKey cache_key;
         auto index_file_key = 
_inverted_index_file_reader->get_index_file_key(&_index_meta);
 
         if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
             cache_key = {index_file_key, column_name, query_type, search_str};
-            analyse_result.emplace_back(search_str);
+            query_info.terms.emplace_back(search_str);
         } else {
+            if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY) {
+                std::regex slopRegex(R"((.*[^ ])\s~(\d+)$)");

Review Comment:
   use precompiled regex pattern instead of creating it for each query.



##########
be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp:
##########
@@ -74,14 +88,18 @@ void PhraseQuery::add(const std::wstring& field_name, const 
std::vector<std::str
 }
 
 void PhraseQuery::search(roaring::Roaring& roaring) {
-    if (_lead1.isEmpty()) {
-        return;
-    }
-    if (_lead2.isEmpty()) {
-        search_by_bitmap(roaring);
-        return;
-    }
-    search_by_skiplist(roaring);
+    // if (_lead1.isEmpty()) {
+    //     return;
+    // }
+    // if (_lead2.isEmpty()) {
+    //     search_by_bitmap(roaring);
+    //     return;
+    // }
+    // search_by_skiplist(roaring);
+
+    _searcher->_search(_query.get(), [&roaring](const int32_t docid, const 
float_t /*score*/) {
+        roaring.add(docid);

Review Comment:
   Is the performance the same as before?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to