github-actions[bot] commented on code in PR #30743:
URL: https://github.com/apache/doris/pull/30743#discussion_r1475591824


##########
be/src/vec/functions/function_string.h:
##########
@@ -1431,6 +1430,103 @@ class FunctionStringRepeat : public IFunction {
     String get_name() const override { return name; }
     size_t get_number_of_arguments() const override { return 2; }
 
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_EQ(arguments.size(), 2);
+        auto res = ColumnString::create();
+
+        ColumnPtr argument_ptr[2];
+        argument_ptr[0] =
+                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        argument_ptr[1] = block.get_by_position(arguments[1]).column;
+
+        if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) 
{
+            if (auto* col2 = 
check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
+                vector_vector(col1->get_chars(), col1->get_offsets(), 
col2->get_data(),
+                              res->get_chars(), res->get_offsets(),
+                              context->state()->repeat_max_num());
+                block.replace_by_position(result, std::move(res));
+                return Status::OK();
+            } else if (auto* col2_const = 
check_and_get_column<ColumnConst>(*argument_ptr[1])) {
+                
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
+                int repeat = 0;
+                repeat = std::min<int>(col2_const->get_int(0), 
context->state()->repeat_max_num());
+
+                if (repeat <= 0) {
+                    res->insert_many_defaults(input_rows_count);
+                } else {
+                    vector_const(col1->get_chars(), col1->get_offsets(), 
repeat, res->get_chars(),
+                                 res->get_offsets());
+                }
+                block.replace_by_position(result, std::move(res));
+                return Status::OK();
+            }
+        }
+
+        return Status::RuntimeError("repeat function get error param: {}, {}",
+                                    argument_ptr[0]->get_name(), 
argument_ptr[1]->get_name());
+    }
+
+    void vector_vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,

Review Comment:
   warning: method 'vector_vector' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static void vector_vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
   ```
   
   be/src/vec/functions/function_string.h:1474:
   ```diff
   -                        ColumnString::Offsets& res_offsets, const int 
repeat_max_num) const {
   +                        ColumnString::Offsets& res_offsets, const int 
repeat_max_num) {
   ```
   



##########
be/src/vec/functions/function_string.h:
##########
@@ -1431,6 +1430,103 @@
     String get_name() const override { return name; }
     size_t get_number_of_arguments() const override { return 2; }
 
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_EQ(arguments.size(), 2);
+        auto res = ColumnString::create();
+
+        ColumnPtr argument_ptr[2];
+        argument_ptr[0] =
+                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        argument_ptr[1] = block.get_by_position(arguments[1]).column;
+
+        if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) 
{
+            if (auto* col2 = 
check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
+                vector_vector(col1->get_chars(), col1->get_offsets(), 
col2->get_data(),
+                              res->get_chars(), res->get_offsets(),
+                              context->state()->repeat_max_num());
+                block.replace_by_position(result, std::move(res));
+                return Status::OK();
+            } else if (auto* col2_const = 
check_and_get_column<ColumnConst>(*argument_ptr[1])) {
+                
DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
+                int repeat = 0;
+                repeat = std::min<int>(col2_const->get_int(0), 
context->state()->repeat_max_num());
+
+                if (repeat <= 0) {
+                    res->insert_many_defaults(input_rows_count);
+                } else {
+                    vector_const(col1->get_chars(), col1->get_offsets(), 
repeat, res->get_chars(),
+                                 res->get_offsets());
+                }
+                block.replace_by_position(result, std::move(res));
+                return Status::OK();
+            }
+        }
+
+        return Status::RuntimeError("repeat function get error param: {}, {}",
+                                    argument_ptr[0]->get_name(), 
argument_ptr[1]->get_name());
+    }
+
+    void vector_vector(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
+                       const ColumnInt32::Container& repeats, 
ColumnString::Chars& res_data,
+                       ColumnString::Offsets& res_offsets, const int 
repeat_max_num) const {
+        size_t input_row_size = offsets.size();
+
+        fmt::memory_buffer buffer;
+        res_offsets.resize(input_row_size);
+        for (ssize_t i = 0; i < input_row_size; ++i) {
+            buffer.clear();
+            const char* raw_str = reinterpret_cast<const 
char*>(&data[offsets[i - 1]]);
+            size_t size = offsets[i] - offsets[i - 1];
+            int repeat = 0;
+            repeat = std::min<int>(repeats[i], repeat_max_num);
+
+            if (repeat <= 0) {
+                StringOP::push_empty_string(i, res_data, res_offsets);
+            } else {
+                for (int j = 0; j < repeat; ++j) {
+                    buffer.append(raw_str, raw_str + size);
+                }
+                StringOP::push_value_string(std::string_view(buffer.data(), 
buffer.size()), i,
+                                            res_data, res_offsets);
+            }
+        }
+    }
+
+    // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
+    //       2. abstract the `vector_vector` and `vector_const`
+    //       3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
+    void vector_const(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,

Review Comment:
   warning: method 'vector_const' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static void vector_const(const ColumnString::Chars& data, const 
ColumnString::Offsets& offsets,
   ```
   
   be/src/vec/functions/function_string.h:1503:
   ```diff
   -                       ColumnString::Offsets& res_offsets) const {
   +                       ColumnString::Offsets& res_offsets) {
   ```
   



##########
be/src/vec/functions/function_string.h:
##########
@@ -2166,6 +2259,257 @@
     static constexpr auto name = "split_by_string";
 
     static FunctionPtr create() { return 
std::make_shared<FunctionSplitByString>(); }
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return false; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        DCHECK(is_string(arguments[0]))
+                << "first argument for function: " << name << " should be 
string"
+                << " and arguments[0] is " << arguments[0]->get_name();
+        DCHECK(is_string(arguments[1]))
+                << "second argument for function: " << name << " should be 
string"
+                << " and arguments[1] is " << arguments[1]->get_name();
+        return std::make_shared<DataTypeArray>(arguments[0]);
+    }
+
+    Status execute_impl(FunctionContext* /*context*/, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t /*input_rows_count*/) const 
override {
+        DCHECK_EQ(arguments.size(), 2);
+
+        const auto& [src_column, left_const] =
+                unpack_if_const(block.get_by_position(arguments[0]).column);
+        const auto& [right_column, right_const] =
+                unpack_if_const(block.get_by_position(arguments[1]).column);
+
+        DataTypePtr right_column_type = 
block.get_by_position(arguments[1]).type;
+        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
+        auto dest_column_ptr = 
ColumnArray::create(src_column_type->create_column(),
+                                                   
ColumnArray::ColumnOffsets::create());
+
+        IColumn* dest_nested_column = &dest_column_ptr->get_data();
+        auto& dest_offsets = dest_column_ptr->get_offsets();
+        DCHECK(dest_nested_column != nullptr);
+        dest_nested_column->reserve(0);
+        dest_offsets.reserve(0);
+
+        const auto* col_left = 
check_and_get_column<ColumnString>(src_column.get());
+        if (!col_left) {
+            return Status::InternalError("Left operator of function {} can not 
be {}", get_name(),
+                                         src_column_type->get_name());
+        }
+
+        const auto* col_right = 
check_and_get_column<ColumnString>(right_column.get());
+        if (!col_right) {
+            return Status::InternalError("Right operator of function {} can 
not be {}", get_name(),
+                                         right_column_type->get_name());
+        }
+
+        // split_by_string(ColumnString, "xxx")
+        if (right_const) {
+            _execute_constant_delimiter(*col_left, col_right->get_data_at(0), 
*dest_nested_column,
+                                        dest_offsets);
+        } else if (left_const) {
+            // split_by_string("xxx", ColumnString)
+            _execute_constant_src_string(col_left->get_data_at(0), *col_right, 
*dest_nested_column,
+                                         dest_offsets);
+        } else {
+            // split_by_string(ColumnString, ColumnString)
+            _execute_vector(*col_left, *col_right, *dest_nested_column, 
dest_offsets);
+        }
+
+        block.replace_by_position(result, std::move(dest_column_ptr));
+
+        return Status::OK();
+    }
+
+private:
+    void _execute_constant_delimiter(const ColumnString& src_column_string,

Review Comment:
   warning: method '_execute_constant_delimiter' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static void _execute_constant_delimiter(const ColumnString& 
src_column_string,
   ```
   
   be/src/vec/functions/function_string.h:2331:
   ```diff
   -                                      ColumnArray::Offsets64& dest_offsets) 
const {
   +                                      ColumnArray::Offsets64& dest_offsets) 
{
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to