zclllyybb commented on code in PR #33243:
URL: https://github.com/apache/doris/pull/33243#discussion_r1561985177


##########
be/src/vec/functions/function_string.h:
##########
@@ -3852,4 +3852,108 @@ class FunctionIntToChar : public IFunction {
 #endif
     }
 };
+
+class FunctionStrInsert : public IFunction {
+public:
+    static constexpr auto name = "str_insert";
+    static FunctionPtr create() { return 
std::make_shared<FunctionStrInsert>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 4; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_EQ(arguments.size(), 4);
+
+        bool col_const[4];
+        ColumnPtr argument_columns[4];
+        for (int i = 0; i < 4; ++i) {
+            std::tie(argument_columns[i], col_const[i]) =
+                    
unpack_if_const(block.get_by_position(arguments[i]).column);
+        }
+        const auto* col_origin = assert_cast<const 
ColumnString*>(argument_columns[0].get());
+
+        const auto* col_pos = assert_cast<const 
ColumnVector<Int32>*>(argument_columns[1].get())
+                                      ->get_data()
+                                      .data();
+        const auto* col_len = assert_cast<const 
ColumnVector<Int32>*>(argument_columns[2].get())
+                                      ->get_data()
+                                      .data();
+        const auto* col_insert = assert_cast<const 
ColumnString*>(argument_columns[3].get());
+
+        ColumnString::MutablePtr col_res = ColumnString::create();
+
+        if (col_const[1]) {
+            vector<true>(col_origin, col_pos, col_len, col_insert, col_res, 
col_const,
+                         input_rows_count);
+        } else {
+            vector<false>(col_origin, col_pos, col_len, col_insert, col_res, 
col_const,
+                          input_rows_count);
+        }
+
+        block.replace_by_position(result, std::move(col_res));
+        return Status::OK();
+    }
+
+private:
+    // get the new str size
+    static std::pair<bool, size_t> get_size(size_t& str_size, int& pos, int& 
len,
+                                            size_t& ins_size) {
+        if (pos > str_size || pos < 1) {
+            return {true, str_size};
+        }
+        if (len < 0 || pos + len - 1 >= str_size) {
+            len = str_size - pos + 1;
+            return {false, pos + ins_size - 1};
+        }
+        return {false, str_size - len + ins_size};
+    }
+
+    template <bool is_const>
+    void vector(const ColumnString* col_origin, int const* col_pos, int const* 
col_len,
+                const ColumnString* col_insert, ColumnString::MutablePtr& 
col_res,
+                bool col_const[4], size_t input_rows_count) const {
+        auto& col_res_chars = col_res->get_chars();
+        auto& col_res_offsets = col_res->get_offsets();
+        StringRef origin_str = col_origin->get_data_at(0);
+        StringRef insert_str = col_insert->get_data_at(0);
+        auto pos = col_pos[0];
+        auto len = col_len[0];
+        if constexpr (is_const) {
+            if (pos < 1 || (col_const[0] && pos > origin_str.size)) {
+                for (size_t i = 0; i < input_rows_count; i++) {
+                    origin_str = col_origin->get_data_at(index_check_const(i, 
col_const[0]));
+                    col_res->insert_data(origin_str.data, origin_str.size);
+                }
+                return;
+            }
+        }
+        for (size_t i = 0; i < input_rows_count; i++) {
+            origin_str = col_origin->get_data_at(index_check_const(i, 
col_const[0]));
+            pos = col_pos[index_check_const(i, col_const[1])];
+            len = col_len[index_check_const(i, col_const[2])];
+            insert_str = col_insert->get_data_at(index_check_const(i, 
col_const[3]));
+
+            if (auto [is_origin, offset] = get_size(origin_str.size, pos, len, 
insert_str.size);

Review Comment:
   try to use branchless structure to refactor this code. now will block 
auto-vectorization



##########
be/src/vec/functions/function_string.h:
##########
@@ -3852,4 +3852,108 @@ class FunctionIntToChar : public IFunction {
 #endif
     }
 };
+
+class FunctionStrInsert : public IFunction {
+public:
+    static constexpr auto name = "str_insert";
+    static FunctionPtr create() { return 
std::make_shared<FunctionStrInsert>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 4; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_EQ(arguments.size(), 4);
+
+        bool col_const[4];
+        ColumnPtr argument_columns[4];
+        for (int i = 0; i < 4; ++i) {
+            std::tie(argument_columns[i], col_const[i]) =
+                    
unpack_if_const(block.get_by_position(arguments[i]).column);
+        }
+        const auto* col_origin = assert_cast<const 
ColumnString*>(argument_columns[0].get());
+
+        const auto* col_pos = assert_cast<const 
ColumnVector<Int32>*>(argument_columns[1].get())
+                                      ->get_data()
+                                      .data();
+        const auto* col_len = assert_cast<const 
ColumnVector<Int32>*>(argument_columns[2].get())
+                                      ->get_data()
+                                      .data();
+        const auto* col_insert = assert_cast<const 
ColumnString*>(argument_columns[3].get());
+
+        ColumnString::MutablePtr col_res = ColumnString::create();
+
+        if (col_const[1]) {
+            vector<true>(col_origin, col_pos, col_len, col_insert, col_res, 
col_const,
+                         input_rows_count);
+        } else {
+            vector<false>(col_origin, col_pos, col_len, col_insert, col_res, 
col_const,
+                          input_rows_count);
+        }
+
+        block.replace_by_position(result, std::move(col_res));
+        return Status::OK();
+    }
+
+private:
+    // get the new str size
+    static std::pair<bool, size_t> get_size(size_t& str_size, int& pos, int& 
len,
+                                            size_t& ins_size) {
+        if (pos > str_size || pos < 1) {
+            return {true, str_size};
+        }
+        if (len < 0 || pos + len - 1 >= str_size) {
+            len = str_size - pos + 1;
+            return {false, pos + ins_size - 1};
+        }
+        return {false, str_size - len + ins_size};
+    }
+
+    template <bool is_const>
+    void vector(const ColumnString* col_origin, int const* col_pos, int const* 
col_len,
+                const ColumnString* col_insert, ColumnString::MutablePtr& 
col_res,
+                bool col_const[4], size_t input_rows_count) const {
+        auto& col_res_chars = col_res->get_chars();
+        auto& col_res_offsets = col_res->get_offsets();
+        StringRef origin_str = col_origin->get_data_at(0);
+        StringRef insert_str = col_insert->get_data_at(0);
+        auto pos = col_pos[0];
+        auto len = col_len[0];
+        if constexpr (is_const) {
+            if (pos < 1 || (col_const[0] && pos > origin_str.size)) {
+                for (size_t i = 0; i < input_rows_count; i++) {
+                    origin_str = col_origin->get_data_at(index_check_const(i, 
col_const[0]));
+                    col_res->insert_data(origin_str.data, origin_str.size);
+                }
+                return;
+            }
+        }
+        for (size_t i = 0; i < input_rows_count; i++) {
+            origin_str = col_origin->get_data_at(index_check_const(i, 
col_const[0]));

Review Comment:
   after change to check 1,2,3, don't use `index_check_const` again. we should 
know exactly what they are at this point



##########
be/src/vec/functions/function_string.h:
##########
@@ -3852,4 +3852,108 @@ class FunctionIntToChar : public IFunction {
 #endif
     }
 };
+
+class FunctionStrInsert : public IFunction {
+public:
+    static constexpr auto name = "str_insert";
+    static FunctionPtr create() { return 
std::make_shared<FunctionStrInsert>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 4; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) const override 
{
+        DCHECK_EQ(arguments.size(), 4);
+
+        bool col_const[4];
+        ColumnPtr argument_columns[4];
+        for (int i = 0; i < 4; ++i) {
+            std::tie(argument_columns[i], col_const[i]) =
+                    
unpack_if_const(block.get_by_position(arguments[i]).column);
+        }
+        const auto* col_origin = assert_cast<const 
ColumnString*>(argument_columns[0].get());
+
+        const auto* col_pos = assert_cast<const 
ColumnVector<Int32>*>(argument_columns[1].get())
+                                      ->get_data()
+                                      .data();
+        const auto* col_len = assert_cast<const 
ColumnVector<Int32>*>(argument_columns[2].get())
+                                      ->get_data()
+                                      .data();
+        const auto* col_insert = assert_cast<const 
ColumnString*>(argument_columns[3].get());
+
+        ColumnString::MutablePtr col_res = ColumnString::create();
+
+        if (col_const[1]) {

Review Comment:
   check col_const[1,2,3]. we should opt for this situation



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to