Copilot commented on code in PR #50367:
URL: https://github.com/apache/doris/pull/50367#discussion_r2057299387


##########
be/src/vec/functions/function_regexp.cpp:
##########
@@ -286,100 +289,106 @@ struct RegexpExtractImpl {
 struct RegexpExtractAllImpl {
     static constexpr auto name = "regexp_extract_all";
 
-    size_t get_number_of_arguments() const { return 2; }
-
-    static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
-                             size_t input_rows_count, ColumnString::Chars& 
result_data,
-                             ColumnString::Offsets& result_offset, NullMap& 
null_map) {
+    template <bool first_const, bool second_const, bool third_const>
+    static void execute_impl(FunctionContext* context, const ColumnPtr* 
argument_columns,
+                             size_t input_rows_count, ColumnArray::MutablePtr& 
result_column) {
         const auto* str_col = 
check_and_get_column<ColumnString>(argument_columns[0].get());
         const auto* pattern_col = 
check_and_get_column<ColumnString>(argument_columns[1].get());
-        for (int i = 0; i < input_rows_count; ++i) {
-            if (null_map[i]) {
-                StringOP::push_null_string(i, result_data, result_offset, 
null_map);
-                continue;
+        const auto* group_idx_col = 
check_and_get_column<ColumnInt32>(argument_columns[2].get());
+
+        auto& result_array_col = assert_cast<ColumnArray&>(*result_column);
+        if constexpr (second_const && third_const) {
+            auto* re = reinterpret_cast<re2::RE2*>(
+                    
context->get_function_state(FunctionContext::THREAD_LOCAL));
+            if (re != nullptr) {
+                auto group_idx = group_idx_col->get_int(0);
+
+                if (re->NumberOfCapturingGroups() < group_idx) {
+                    result_array_col.insert_many_defaults(input_rows_count);
+                    return;
+                }
             }
-            _execute_inner_loop<false>(context, str_col, pattern_col, 
result_data, result_offset,
-                                       null_map, i);
         }
-    }
 
-    static void execute_impl_const_args(FunctionContext* context, ColumnPtr 
argument_columns[],
-                                        size_t input_rows_count, 
ColumnString::Chars& result_data,
-                                        ColumnString::Offsets& result_offset, 
NullMap& null_map) {
-        const auto* str_col = 
check_and_get_column<ColumnString>(argument_columns[0].get());
-        const auto* pattern_col = 
check_and_get_column<ColumnString>(argument_columns[1].get());
+        auto& column_nullable = 
assert_cast<ColumnNullable&>(result_array_col.get_data());
+        auto& null_map = column_nullable.get_null_map_data();
+        auto& column_string = 
assert_cast<ColumnString&>(column_nullable.get_nested_column());
+        auto& offsets = result_array_col.get_offsets();
+
         for (int i = 0; i < input_rows_count; ++i) {
-            if (null_map[i]) {
-                StringOP::push_null_string(i, result_data, result_offset, 
null_map);
-                continue;
-            }
-            _execute_inner_loop<true>(context, str_col, pattern_col, 
result_data, result_offset,
-                                      null_map, i);
+            _execute_inner_loop<first_const, second_const, third_const>(
+                    context, str_col, pattern_col, group_idx_col, i, 
column_string, null_map,
+                    offsets);
         }
     }
-    template <bool Const>
+
+    template <bool first_const, bool second_const, bool third_const>
     static void _execute_inner_loop(FunctionContext* context, const 
ColumnString* str_col,
                                     const ColumnString* pattern_col,
-                                    ColumnString::Chars& result_data,
-                                    ColumnString::Offsets& result_offset, 
NullMap& null_map,
-                                    const size_t index_now) {
-        re2::RE2* re = reinterpret_cast<re2::RE2*>(
+                                    const ColumnInt32* group_idx_col, const 
size_t index_now,
+                                    ColumnString& result_string_column, 
NullMap& null_map,
+                                    ColumnArray::Offsets64& result_offsets) {
+        auto* re = reinterpret_cast<re2::RE2*>(
                 context->get_function_state(FunctionContext::THREAD_LOCAL));
         std::unique_ptr<re2::RE2> scoped_re;
+
         if (re == nullptr) {
             std::string error_str;
-            const auto& pattern = 
pattern_col->get_data_at(index_check_const(index_now, Const));
+            const auto& pattern =
+                    pattern_col->get_data_at(index_check_const(index_now, 
second_const));
             bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), scoped_re);
             if (!st) {
                 context->add_warning(error_str.c_str());
-                StringOP::push_null_string(index_now, result_data, 
result_offset, null_map);
+                null_map.push_back(1);
+                result_string_column.insert_default();
+                result_offsets.emplace_back(result_offsets.back() + 1);
                 return;
             }
             re = scoped_re.get();
         }
-        if (re->NumberOfCapturingGroups() == 0) {
-            StringOP::push_empty_string(index_now, result_data, result_offset);
+
+        auto group_idx = 
group_idx_col->get_element(index_check_const(index_now, third_const));
+
+        if (re->NumberOfCapturingGroups() < group_idx || group_idx < 0) {
+            result_offsets.emplace_back(result_offsets.back());
             return;
         }
-        const auto& str = str_col->get_data_at(index_now);
-        int max_matches = 1 + re->NumberOfCapturingGroups();
+
+        const auto& str = str_col->get_data_at(index_check_const(index_now, 
first_const));
+        int max_matches = 1 + group_idx;
         std::vector<re2::StringPiece> res_matches;
         size_t pos = 0;
         while (pos < str.size) {
-            auto str_pos = str.data + pos;
+            const auto* str_pos = str.data + pos;
             auto str_size = str.size - pos;
             re2::StringPiece str_sp = re2::StringPiece(str_pos, str_size);
             std::vector<re2::StringPiece> matches(max_matches);
-            bool success =
-                    re->Match(str_sp, 0, str_size, re2::RE2::UNANCHORED, 
&matches[0], max_matches);
+            bool success = re->Match(str_sp, 0, str_size, 
re2::RE2::UNANCHORED, matches.data(),
+                                     max_matches);
             if (!success) {
-                StringOP::push_empty_string(index_now, result_data, 
result_offset);
                 break;
             }
+
             if (matches[0].empty()) {
-                StringOP::push_empty_string(index_now, result_data, 
result_offset);
                 pos += 1;
                 continue;
             }
-            res_matches.push_back(matches[1]);
+
+            res_matches.push_back(matches[group_idx]);
             auto offset = std::string(str_pos, 
str_size).find(std::string(matches[0].as_string()));
             pos += offset + matches[0].size();
         }
 
         if (res_matches.empty()) {
-            StringOP::push_empty_string(index_now, result_data, result_offset);
+            result_offsets.emplace_back(result_offsets.back());
             return;
         }
 
-        std::string res = "[";
-        for (int j = 0; j < res_matches.size(); ++j) {
-            res += "'" + res_matches[j].as_string() + "'";
-            if (j < res_matches.size() - 1) {
-                res += ",";
-            }
+        for (auto res_matche : res_matches) {
+            result_string_column.insert_data(res_matche.data(), 
res_matche.size());

Review Comment:
   [nitpick] Possible typo: 'res_matche' might be intended as 'res_match' for 
clarity.
   ```suggestion
           for (auto res_match : res_matches) {
               result_string_column.insert_data(res_match.data(), 
res_match.size());
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to