zhiqiang-hhhh commented on code in PR #30557:
URL: https://github.com/apache/doris/pull/30557#discussion_r1470662098


##########
be/src/vec/functions/like.cpp:
##########
@@ -39,26 +39,25 @@
 
 namespace doris::vectorized {
 // A regex to match any regex pattern is equivalent to a substring search.
-static const RE2 SUBSTRING_RE(
-        
"(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
+static const RE2 
SUBSTRING_RE(R"((?:\.\*)*([^\.\^\{\[\(\|\)\]\}\+\*\?\$\\]*)(?:\.\*)*)");

Review Comment:
   means that we have a wrong regex in the past for a long time ?



##########
be/src/vec/functions/like.cpp:
##########
@@ -320,6 +319,53 @@ Status FunctionLikeBase::hs_prepare(FunctionContext* 
context, const char* expres
     return Status::OK();
 }
 
+static Status execute_substring(const ColumnString::Chars& values,

Review Comment:
   is seems this pr just changed position of this function ?



##########
be/src/vec/functions/like.cpp:
##########
@@ -447,43 +445,39 @@ void FunctionLike::convert_like_pattern(LikeSearchState* 
state, const std::strin
     }
 

Review Comment:
   re_pattern->reserve(pattern.size() * 2);



##########
be/src/vec/functions/like.cpp:
##########
@@ -447,43 +445,39 @@ void FunctionLike::convert_like_pattern(LikeSearchState* 
state, const std::strin
     }
 
     // add ^ to pattern head to match line head
-    if (pattern.size() > 0 && pattern[0] != '%') {
+    if (!pattern.empty() && pattern[0] != '%') {
         re_pattern->append("^");
     }
 
     bool is_escaped = false;

Review Comment:
   maybe we should have a separate class which does not check black slash 
escapes for better performance in the future



##########
be/src/vec/functions/like.cpp:
##########
@@ -447,43 +445,39 @@ void FunctionLike::convert_like_pattern(LikeSearchState* 
state, const std::strin
     }
 
     // add ^ to pattern head to match line head
-    if (pattern.size() > 0 && pattern[0] != '%') {
+    if (!pattern.empty() && pattern[0] != '%') {
         re_pattern->append("^");
     }
 
     bool is_escaped = false;
-    for (size_t i = 0; i < pattern.size(); ++i) {
-        if (!is_escaped) {
-            switch (pattern[i]) {
+    for (char i : pattern) {
+        if (is_escaped) { // the last one is '/' or specific escape char
+            re_pattern->append(1, i);

Review Comment:
   what if `i` is a special character of hyperscan or re2?



##########
be/src/udf/udf.h:
##########
@@ -25,6 +25,7 @@
 #include <string>
 #include <vector>
 
+#include "runtime/runtime_state.h"

Review Comment:
   seems no need



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to