zclllyybb commented on code in PR #33243: URL: https://github.com/apache/doris/pull/33243#discussion_r1561985177
########## be/src/vec/functions/function_string.h: ########## @@ -3852,4 +3852,108 @@ class FunctionIntToChar : public IFunction { #endif } }; + +class FunctionStrInsert : public IFunction { +public: + static constexpr auto name = "str_insert"; + static FunctionPtr create() { return std::make_shared<FunctionStrInsert>(); } + String get_name() const override { return name; } + size_t get_number_of_arguments() const override { return 4; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared<DataTypeString>(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + DCHECK_EQ(arguments.size(), 4); + + bool col_const[4]; + ColumnPtr argument_columns[4]; + for (int i = 0; i < 4; ++i) { + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + } + const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get()); + + const auto* col_pos = assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get()) + ->get_data() + .data(); + const auto* col_len = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get()) + ->get_data() + .data(); + const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get()); + + ColumnString::MutablePtr col_res = ColumnString::create(); + + if (col_const[1]) { + vector<true>(col_origin, col_pos, col_len, col_insert, col_res, col_const, + input_rows_count); + } else { + vector<false>(col_origin, col_pos, col_len, col_insert, col_res, col_const, + input_rows_count); + } + + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } + +private: + // get the new str size + static std::pair<bool, size_t> get_size(size_t& str_size, int& pos, int& len, + size_t& ins_size) { + if (pos > str_size || pos < 1) { + return {true, str_size}; + } + if (len < 0 || pos + len - 1 >= str_size) { + len = str_size - pos + 1; + return {false, pos + ins_size - 1}; + } + return {false, str_size - len + ins_size}; + } + + template <bool is_const> + void vector(const ColumnString* col_origin, int const* col_pos, int const* col_len, + const ColumnString* col_insert, ColumnString::MutablePtr& col_res, + bool col_const[4], size_t input_rows_count) const { + auto& col_res_chars = col_res->get_chars(); + auto& col_res_offsets = col_res->get_offsets(); + StringRef origin_str = col_origin->get_data_at(0); + StringRef insert_str = col_insert->get_data_at(0); + auto pos = col_pos[0]; + auto len = col_len[0]; + if constexpr (is_const) { + if (pos < 1 || (col_const[0] && pos > origin_str.size)) { + for (size_t i = 0; i < input_rows_count; i++) { + origin_str = col_origin->get_data_at(index_check_const(i, col_const[0])); + col_res->insert_data(origin_str.data, origin_str.size); + } + return; + } + } + for (size_t i = 0; i < input_rows_count; i++) { + origin_str = col_origin->get_data_at(index_check_const(i, col_const[0])); + pos = col_pos[index_check_const(i, col_const[1])]; + len = col_len[index_check_const(i, col_const[2])]; + insert_str = col_insert->get_data_at(index_check_const(i, col_const[3])); + + if (auto [is_origin, offset] = get_size(origin_str.size, pos, len, insert_str.size); Review Comment: try to use branchless structure to refactor this code. now will block auto-vectorization ########## be/src/vec/functions/function_string.h: ########## @@ -3852,4 +3852,108 @@ class FunctionIntToChar : public IFunction { #endif } }; + +class FunctionStrInsert : public IFunction { +public: + static constexpr auto name = "str_insert"; + static FunctionPtr create() { return std::make_shared<FunctionStrInsert>(); } + String get_name() const override { return name; } + size_t get_number_of_arguments() const override { return 4; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared<DataTypeString>(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + DCHECK_EQ(arguments.size(), 4); + + bool col_const[4]; + ColumnPtr argument_columns[4]; + for (int i = 0; i < 4; ++i) { + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + } + const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get()); + + const auto* col_pos = assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get()) + ->get_data() + .data(); + const auto* col_len = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get()) + ->get_data() + .data(); + const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get()); + + ColumnString::MutablePtr col_res = ColumnString::create(); + + if (col_const[1]) { + vector<true>(col_origin, col_pos, col_len, col_insert, col_res, col_const, + input_rows_count); + } else { + vector<false>(col_origin, col_pos, col_len, col_insert, col_res, col_const, + input_rows_count); + } + + block.replace_by_position(result, std::move(col_res)); + return Status::OK(); + } + +private: + // get the new str size + static std::pair<bool, size_t> get_size(size_t& str_size, int& pos, int& len, + size_t& ins_size) { + if (pos > str_size || pos < 1) { + return {true, str_size}; + } + if (len < 0 || pos + len - 1 >= str_size) { + len = str_size - pos + 1; + return {false, pos + ins_size - 1}; + } + return {false, str_size - len + ins_size}; + } + + template <bool is_const> + void vector(const ColumnString* col_origin, int const* col_pos, int const* col_len, + const ColumnString* col_insert, ColumnString::MutablePtr& col_res, + bool col_const[4], size_t input_rows_count) const { + auto& col_res_chars = col_res->get_chars(); + auto& col_res_offsets = col_res->get_offsets(); + StringRef origin_str = col_origin->get_data_at(0); + StringRef insert_str = col_insert->get_data_at(0); + auto pos = col_pos[0]; + auto len = col_len[0]; + if constexpr (is_const) { + if (pos < 1 || (col_const[0] && pos > origin_str.size)) { + for (size_t i = 0; i < input_rows_count; i++) { + origin_str = col_origin->get_data_at(index_check_const(i, col_const[0])); + col_res->insert_data(origin_str.data, origin_str.size); + } + return; + } + } + for (size_t i = 0; i < input_rows_count; i++) { + origin_str = col_origin->get_data_at(index_check_const(i, col_const[0])); Review Comment: after change to check 1,2,3, don't use `index_check_const` again. we should know exactly what they are at this point ########## be/src/vec/functions/function_string.h: ########## @@ -3852,4 +3852,108 @@ class FunctionIntToChar : public IFunction { #endif } }; + +class FunctionStrInsert : public IFunction { +public: + static constexpr auto name = "str_insert"; + static FunctionPtr create() { return std::make_shared<FunctionStrInsert>(); } + String get_name() const override { return name; } + size_t get_number_of_arguments() const override { return 4; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared<DataTypeString>(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) const override { + DCHECK_EQ(arguments.size(), 4); + + bool col_const[4]; + ColumnPtr argument_columns[4]; + for (int i = 0; i < 4; ++i) { + std::tie(argument_columns[i], col_const[i]) = + unpack_if_const(block.get_by_position(arguments[i]).column); + } + const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get()); + + const auto* col_pos = assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get()) + ->get_data() + .data(); + const auto* col_len = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get()) + ->get_data() + .data(); + const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get()); + + ColumnString::MutablePtr col_res = ColumnString::create(); + + if (col_const[1]) { Review Comment: check col_const[1,2,3]. we should opt for this situation -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org