This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new ebe031c019d [fix](inverted index) Fix match_regexp to correctly handle 
empty string patterns (#40659)
ebe031c019d is described below

commit ebe031c019d13fd58da06fc82de1c62eb9cd049e
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Wed Sep 11 18:10:33 2024 +0800

    [fix](inverted index) Fix match_regexp to correctly handle empty string 
patterns (#40659)
    
    https://github.com/apache/doris/pull/39503
---
 be/src/vec/functions/match.cpp                     |  9 -------
 .../inverted_index_p0/test_index_match_regexp.out  |  3 +++
 .../data/inverted_index_p0/test_no_index_match.out |  3 +++
 .../test_index_match_regexp.groovy                 |  1 +
 .../inverted_index_p0/test_no_index_match.groovy   | 31 ++++++++++++----------
 5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index d656c3f5f1d..5d87ed8efdd 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -407,15 +407,6 @@ Status FunctionMatchRegexp::execute_match(FunctionContext* 
context, const std::s
     VLOG_DEBUG << "begin to run FunctionMatchRegexp::execute_match, 
parser_type: "
                << 
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type);
 
-    if (match_query_str.empty()) {
-        VLOG_DEBUG << fmt::format(
-                "token parser result is empty for query, "
-                "please check your query: '{}' and index parser: '{}'",
-                match_query_str,
-                
inverted_index_parser_type_to_string(inverted_index_ctx->parser_type));
-        return Status::OK();
-    }
-
     const std::string& pattern = match_query_str;
 
     hs_database_t* database = nullptr;
diff --git a/regression-test/data/inverted_index_p0/test_index_match_regexp.out 
b/regression-test/data/inverted_index_p0/test_index_match_regexp.out
index f9a9caf6d74..fb5d23ad266 100644
--- a/regression-test/data/inverted_index_p0/test_index_match_regexp.out
+++ b/regression-test/data/inverted_index_p0/test_index_match_regexp.out
@@ -2,6 +2,9 @@
 -- !sql --
 1000
 
+-- !sql --
+1000
+
 -- !sql --
 54
 
diff --git a/regression-test/data/inverted_index_p0/test_no_index_match.out 
b/regression-test/data/inverted_index_p0/test_no_index_match.out
index 932dd55fc5a..ea3bd71bcd6 100644
--- a/regression-test/data/inverted_index_p0/test_no_index_match.out
+++ b/regression-test/data/inverted_index_p0/test_no_index_match.out
@@ -20,3 +20,6 @@
 -- !sql --
 0
 
+-- !sql --
+1000
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy 
b/regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy
index 3458f086740..31863432707 100644
--- a/regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy
+++ b/regression-test/suites/inverted_index_p0/test_index_match_regexp.groovy
@@ -80,6 +80,7 @@ suite("test_index_match_regexp", "p0"){
 
         sql "sync"
 
+        qt_sql """ select count() from test_index_match_regexp where request 
match_regexp ''; """
         qt_sql """ select count() from test_index_match_regexp where request 
match_regexp '^h'; """
         qt_sql """ select count() from test_index_match_regexp where request 
match_regexp '^team'; """
         qt_sql """ select count() from test_index_match_regexp where request 
match_regexp 's\$'; """
diff --git 
a/regression-test/suites/inverted_index_p0/test_no_index_match.groovy 
b/regression-test/suites/inverted_index_p0/test_no_index_match.groovy
index 60227b01168..7cace77da7e 100644
--- a/regression-test/suites/inverted_index_p0/test_no_index_match.groovy
+++ b/regression-test/suites/inverted_index_p0/test_no_index_match.groovy
@@ -18,7 +18,7 @@
 
 suite("test_no_index_match", "p0") {
     // define a sql table
-    def testTable_unique = "httplogs_unique"
+    def testTable = "test_no_index_match"
 
     def create_httplogs_unique_table = {testTablex ->
       // multi-line sql
@@ -77,35 +77,38 @@ suite("test_no_index_match", "p0") {
     }
 
     try {
-      sql "DROP TABLE IF EXISTS ${testTable_unique}"
-      create_httplogs_unique_table.call(testTable_unique)
-      load_httplogs_data.call(testTable_unique, 'httplogs_unique', 'true', 
'json', 'documents-1000.json')
+      sql "DROP TABLE IF EXISTS ${testTable}"
+      create_httplogs_unique_table.call(testTable)
+      load_httplogs_data.call(testTable, 'test_no_index_match', 'true', 
'json', 'documents-1000.json')
 
-      sql """ INSERT INTO ${testTable_unique} VALUES (1, '1', '', 1, 1); """
+      sql """ INSERT INTO ${testTable} VALUES (1, '1', '', 1, 1); """
 
       sql 'sync'
+      sql """ set enable_common_expr_pushdown = true """
 
       try {
-          qt_sql """ select count() from ${testTable_unique} where (request 
match_any 'hm bg');  """
-          qt_sql """ select count() from ${testTable_unique} where (request 
match_all 'hm bg');  """
-          qt_sql """ select count() from ${testTable_unique} where (request 
match_phrase 'hm bg');  """
-          qt_sql """ select count() from ${testTable_unique} where (request 
match_phrase_prefix 'hm b');  """
-          qt_sql """ select count() from ${testTable_unique} where (request 
match_regexp 'la');  """
+          qt_sql """ select count() from ${testTable} where (request match_any 
'hm bg');  """
+          qt_sql """ select count() from ${testTable} where (request match_all 
'hm bg');  """
+          qt_sql """ select count() from ${testTable} where (request 
match_phrase 'hm bg');  """
+          qt_sql """ select count() from ${testTable} where (request 
match_phrase_prefix 'hm b');  """
+          qt_sql """ select count() from ${testTable} where (request 
match_regexp 'la');  """
 
-          qt_sql """ select count() from ${testTable_unique} where (request 
match_phrase '欧冶工业品');  """
-          qt_sql """ select count() from ${testTable_unique} where (request 
match_phrase_prefix '欧冶工业品');  """
+          qt_sql """ select count() from ${testTable} where (request 
match_phrase '欧冶工业品');  """
+          qt_sql """ select count() from ${testTable} where (request 
match_phrase_prefix '欧冶工业品');  """
+
+          qt_sql """ select count() from ${testTable} where (request 
match_regexp '');  """
       } finally {
       }
 
       try {
-          sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) 
*/ count() from ${testTable_unique} where (request match_phrase 'hm bg');  """
+          sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) 
*/ count() from ${testTable} where (request match_phrase 'hm bg');  """
       } catch (Exception e) {
         log.info(e.getMessage());
         assertTrue(e.getMessage().contains("match_phrase not support 
execute_match"))
       }
 
       try {
-          sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) 
*/ count() from ${testTable_unique} where (request match_phrase_prefix 'hm b'); 
 """
+          sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) 
*/ count() from ${testTable} where (request match_phrase_prefix 'hm b');  """
       } catch (Exception e) {
         log.info(e.getMessage());
         assertTrue(e.getMessage().contains("match_phrase_prefix not support 
execute_match"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to