This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 43c6428aea [Function](string) support sub_replace function (#13736) 43c6428aea is described below commit 43c6428aea0fc70c9c80de4d31ec379c13898924 Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Fri Oct 28 08:40:08 2022 +0800 [Function](string) support sub_replace function (#13736) * [Function](string) support sub_replace function * remove conf --- be/src/vec/functions/function_string.cpp | 2 + be/src/vec/functions/function_string.h | 135 +++++++++++++++++++++ .../sql-functions/string-functions/sub_replace.md | 53 ++++++++ docs/sidebars.json | 1 + .../sql-functions/string-functions/sub_replace.md | 52 ++++++++ gensrc/script/doris_builtins_functions.py | 5 +- .../string_functions/test_string_function.out | 6 + .../string_functions/test_string_function.groovy | 3 + 8 files changed, 256 insertions(+), 1 deletion(-) diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 7033eee2f8..f694d274ce 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -686,6 +686,8 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl>>(); factory.register_function<FunctionStringMd5AndSM3<SM3Sum>>(); factory.register_function<FunctionReplace>(); + factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>(); + factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>(); factory.register_alias(FunctionLeft::name, "strleft"); factory.register_alias(FunctionRight::name, "strright"); diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index d3b278a0af..c652f8deef 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -1630,4 +1630,139 @@ struct ReverseImpl { } }; +template <typename Impl> +class FunctionSubReplace : public IFunction { +public: + static constexpr auto name = "sub_replace"; + + static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); } + + String get_name() const override { return name; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared<DataTypeString>()); + } + + bool is_variadic() const override { return true; } + + DataTypes get_variadic_argument_types_impl() const override { + return Impl::get_variadic_argument_types(); + } + + size_t get_number_of_arguments() const override { + return get_variadic_argument_types_impl().size(); + } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + return Impl::execute_impl(context, block, arguments, result, input_rows_count); + } +}; + +struct SubReplaceImpl { + static Status replace_execute(Block& block, const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + auto res_column = ColumnString::create(); + auto result_column = assert_cast<ColumnString*>(res_column.get()); + auto args_null_map = ColumnUInt8::create(input_rows_count, 0); + ColumnPtr argument_columns[4]; + for (int i = 0; i < 4; ++i) { + argument_columns[i] = + block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) { + // Danger: Here must dispose the null map data first! Because + // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem + // of column nullable mem of null map + VectorizedUtils::update_null_map(args_null_map->get_data(), + nullable->get_null_map_data()); + argument_columns[i] = nullable->get_nested_column_ptr(); + } + } + + auto data_column = assert_cast<const ColumnString*>(argument_columns[0].get()); + auto mask_column = assert_cast<const ColumnString*>(argument_columns[1].get()); + auto start_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get()); + auto length_column = assert_cast<const ColumnVector<Int32>*>(argument_columns[3].get()); + + vector(data_column, mask_column, start_column->get_data(), length_column->get_data(), + args_null_map->get_data(), result_column, input_rows_count); + + block.get_by_position(result).column = + ColumnNullable::create(std::move(res_column), std::move(args_null_map)); + return Status::OK(); + } + +private: + static void vector(const ColumnString* data_column, const ColumnString* mask_column, + const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& length, + NullMap& args_null_map, ColumnString* result_column, + size_t input_rows_count) { + ColumnString::Chars& res_chars = result_column->get_chars(); + ColumnString::Offsets& res_offsets = result_column->get_offsets(); + for (size_t row = 0; row < input_rows_count; ++row) { + StringRef origin_str = data_column->get_data_at(row); + StringRef new_str = mask_column->get_data_at(row); + size_t origin_str_len = origin_str.size; + //input is null, start < 0, len < 0, str_size <= start. return NULL + if (args_null_map[row] || start[row] < 0 || length[row] < 0 || + origin_str_len <= start[row]) { + res_offsets.push_back(res_chars.size()); + args_null_map[row] = 1; + } else { + std::string_view replace_str = new_str.to_string_view(); + std::string result = origin_str.to_string(); + result.replace(start[row], length[row], replace_str); + result_column->insert_data(result.data(), result.length()); + } + } + } +}; + +struct SubReplaceThreeImpl { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), + std::make_shared<DataTypeInt32>()}; + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + auto params = ColumnInt32::create(input_rows_count); + auto& strlen_data = params->get_data(); + + auto str_col = + block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) { + str_col = nullable->get_nested_column_ptr(); + } + auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets(); + + for (int i = 0; i < input_rows_count; ++i) { + strlen_data[i] = str_offset[i] - str_offset[i - 1]; + } + + block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"}); + ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2], + block.columns() - 1}; + return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count); + } +}; + +struct SubReplaceFourImpl { + static DataTypes get_variadic_argument_types() { + return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), + std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()}; + } + + static Status execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count); + } +}; + } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md b/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md new file mode 100644 index 0000000000..38ae718851 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/sub_replace.md @@ -0,0 +1,53 @@ +--- +{ +"title": "sub_replace", +"language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## sub_replace +### Description +#### Syntax + +`VARCHAR sub_replace(VARCHAR str, VARCHAR new_str, INT start [, INT len])` + +Return with new_str replaces the str with length and starting position from start. +When start and len are negative integers, return NULL. +and the default value of len is the length of new_str. + +### example + +``` +mysql> select sub_replace("this is origin str","NEW-STR",1); ++-------------------------------------------------+ +| sub_replace('this is origin str', 'NEW-STR', 1) | ++-------------------------------------------------+ +| tNEW-STRorigin str | ++-------------------------------------------------+ + +mysql> select sub_replace("doris","***",1,2); ++-----------------------------------+ +| sub_replace('doris', '***', 1, 2) | ++-----------------------------------+ +| d***is | ++-----------------------------------+ +``` +### keywords + SUB_REPLACE diff --git a/docs/sidebars.json b/docs/sidebars.json index b02c6d5ad8..c0360c2b5c 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -370,6 +370,7 @@ "sql-manual/sql-functions/string-functions/concat_ws", "sql-manual/sql-functions/string-functions/substr", "sql-manual/sql-functions/string-functions/substring", + "sql-manual/sql-functions/string-functions/sub_replace", "sql-manual/sql-functions/string-functions/append_trailing_char_if_absent", "sql-manual/sql-functions/string-functions/ends_with", "sql-manual/sql-functions/string-functions/starts_with", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md new file mode 100644 index 0000000000..5f2a782cb0 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/sub_replace.md @@ -0,0 +1,52 @@ +--- +{ +"title": "sub_replace", +"language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## sub_replace +### description +#### Syntax + +`VARCHAR sub_replace(VARCHAR str, VARCHAR new_str, INT start [, INT len])` + +返回用new_str字符串替换str中从start开始长度为len的新字符串。 +其中start,len为负整数,返回NULL, 且len的默认值为new_str的长度。 + +### example + +``` +mysql> select sub_replace("this is origin str","NEW-STR",1); ++-------------------------------------------------+ +| sub_replace('this is origin str', 'NEW-STR', 1) | ++-------------------------------------------------+ +| tNEW-STRorigin str | ++-------------------------------------------------+ + +mysql> select sub_replace("doris","***",1,2); ++-----------------------------------+ +| sub_replace('doris', '***', 1, 2) | ++-----------------------------------+ +| d***is | ++-----------------------------------+ +``` +### keywords + SUB_REPLACE diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 5c14145853..ec88c654b9 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2180,7 +2180,10 @@ visible_functions = [ [['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'], '_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE', '', '', 'vec', 'ALWAYS_NULLABLE'], - [['extract_url_parameter'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],'','', '', 'vec', ''], + [['extract_url_parameter'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],'','', '', 'vec', ''], + + [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],'','', '', 'vec', 'ALWAYS_NULLABLE'], + [['sub_replace'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT', 'INT'],'','', '', 'vec', 'ALWAYS_NULLABLE'], # Longtext function [['substr', 'substring'], 'STRING', ['STRING', 'INT'], diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out index 1eaf68bbca..492999e714 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out @@ -272,3 +272,9 @@ a -- !sql -- +-- !sql -- +tNEW-STRorigin str + +-- !sql -- +d***is + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index 2a107ef487..8894a4f5b0 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -139,4 +139,7 @@ suite("test_string_function") { qt_sql "select substr('a',-1,1);" qt_sql "select substr('a',-2,1);" qt_sql "select substr('a',-3,1);" + + qt_sql "select sub_replace(\"this is origin str\",\"NEW-STR\",1);" + qt_sql "select sub_replace(\"doris\",\"***\",1,2);" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org