This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new de4315c1c5 [feature](function) support `initcap` string function (#13193) de4315c1c5 is described below commit de4315c1c5d8b6f52c94e792abd12a58f7541fe6 Author: luozenglin <37725793+luozeng...@users.noreply.github.com> AuthorDate: Thu Oct 13 21:31:44 2022 +0800 [feature](function) support `initcap` string function (#13193) support `initcap` string function --- be/src/exprs/string_functions.cpp | 21 ++++++++++ be/src/exprs/string_functions.h | 2 + be/src/util/simd/vstring_function.h | 4 +- be/src/vec/functions/function_string.cpp | 39 ++++++++++++++++++ .../sql-functions/string-functions/initcap.md | 47 ++++++++++++++++++++++ docs/sidebars.json | 1 + .../sql-functions/string-functions/initcap.md | 46 +++++++++++++++++++++ gensrc/script/doris_builtins_functions.py | 2 + .../string_functions/test_string_function.out | 3 ++ .../string_functions/test_string_function.groovy | 2 + 10 files changed, 165 insertions(+), 2 deletions(-) diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 2f3abd2e8e..8c381f5d0c 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -350,6 +350,27 @@ StringVal StringFunctions::upper(FunctionContext* context, const StringVal& str) return result; } +StringVal StringFunctions::initcap(FunctionContext* context, const StringVal& str) { + if (str.is_null) { + return StringVal::null(); + } + StringVal result(context, str.len); + + simd::VStringFunctions::to_lower(str.ptr, str.len, result.ptr); + + bool need_capitalize = true; + for (int64_t i = 0; i < str.len; ++i) { + if (!::isalnum(result.ptr[i])) { + need_capitalize = true; + } else if (need_capitalize) { + result.ptr[i] = ::toupper(result.ptr[i]); + need_capitalize = false; + } + } + + return result; +} + StringVal StringFunctions::reverse(FunctionContext* context, const StringVal& str) { if (str.is_null) { return StringVal::null(); diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 1e5ecf8d9e..ac9cfee632 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -81,6 +81,8 @@ public: const doris_udf::StringVal& str); static doris_udf::StringVal upper(doris_udf::FunctionContext* context, const doris_udf::StringVal& str); + static doris_udf::StringVal initcap(doris_udf::FunctionContext* context, + const doris_udf::StringVal& str); static doris_udf::StringVal reverse(doris_udf::FunctionContext* context, const doris_udf::StringVal& str); static doris_udf::StringVal trim(doris_udf::FunctionContext* context, diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h index 7749f82818..e627683d1a 100644 --- a/be/src/util/simd/vstring_function.h +++ b/be/src/util/simd/vstring_function.h @@ -188,7 +188,7 @@ public: } } - static void to_lower(uint8_t* src, int64_t len, uint8_t* dst) { + static void to_lower(const uint8_t* src, int64_t len, uint8_t* dst) { if (len <= 0) { return; } @@ -196,7 +196,7 @@ public: lowerUpper.transfer(src, src + len, dst); } - static void to_upper(uint8_t* src, int64_t len, uint8_t* dst) { + static void to_upper(const uint8_t* src, int64_t len, uint8_t* dst) { if (len <= 0) { return; } diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 9dd34195f5..8ab6066372 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -265,6 +265,42 @@ struct TransferImpl { } }; +// Capitalize first letter +struct NameToInitcap { + static constexpr auto name = "initcap"; +}; + +struct InitcapImpl { + static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, + ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { + size_t offset_size = offsets.size(); + res_offsets.resize(offsets.size()); + memcpy(res_offsets.data(), offsets.data(), offset_size * sizeof(offsets.data())); + + size_t data_length = data.size(); + res_data.resize(data_length); + simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); + + bool need_capitalize = true; + for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) { + auto end_index = res_offsets[offset_index]; + need_capitalize = true; + + for (size_t i = start_index; i < end_index; ++i) { + if (!::isalnum(res_data[i])) { + need_capitalize = true; + } else if (need_capitalize) { + res_data[i] = ::toupper(res_data[i]); + need_capitalize = false; + } + } + + start_index = end_index; + } + return Status::OK(); + } +}; + struct NameTrim { static constexpr auto name = "trim"; }; @@ -588,6 +624,8 @@ using FunctionToLower = FunctionStringToString<TransferImpl<::tolower>, NameToLo using FunctionToUpper = FunctionStringToString<TransferImpl<::toupper>, NameToUpper>; +using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>; + using FunctionLTrim = FunctionStringToString<TrimImpl<true, false>, NameLTrim>; using FunctionRTrim = FunctionStringToString<TrimImpl<false, true>, NameRTrim>; @@ -619,6 +657,7 @@ void register_function_string(SimpleFunctionFactory& factory) { factory.register_function<FunctionUnHex>(); factory.register_function<FunctionToLower>(); factory.register_function<FunctionToUpper>(); + factory.register_function<FunctionToInitcap>(); factory.register_function<FunctionLTrim>(); factory.register_function<FunctionRTrim>(); factory.register_function<FunctionTrim>(); diff --git a/docs/en/docs/sql-manual/sql-functions/string-functions/initcap.md b/docs/en/docs/sql-manual/sql-functions/string-functions/initcap.md new file mode 100644 index 0000000000..1c88aae5f9 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/string-functions/initcap.md @@ -0,0 +1,47 @@ +--- +{ + "title": "initcap", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## initcap +### description +#### Syntax + +`VARCHAR initcap(VARCHAR str)` + +Convert the first letter of each word to upper case and the rest to lower case. +Words are sequences of alphanumeric characters separated by non-alphanumeric characters. + +### example + +``` +mysql> select initcap('hello hello.,HELLO123HELlo'); ++---------------------------------------+ +| initcap('hello hello.,HELLO123HELlo') | ++---------------------------------------+ +| Hello Hello.,Hello123hello | ++---------------------------------------+ +``` +### keywords + INITCAP \ No newline at end of file diff --git a/docs/sidebars.json b/docs/sidebars.json index a3efe0dfc9..3de9e228f1 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -357,6 +357,7 @@ "sql-manual/sql-functions/string-functions/lcase", "sql-manual/sql-functions/string-functions/upper", "sql-manual/sql-functions/string-functions/ucase", + "sql-manual/sql-functions/string-functions/initcap", "sql-manual/sql-functions/string-functions/repeat", "sql-manual/sql-functions/string-functions/reverse", "sql-manual/sql-functions/string-functions/concat", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/initcap.md b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/initcap.md new file mode 100644 index 0000000000..cee32f2b08 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/string-functions/initcap.md @@ -0,0 +1,46 @@ +--- +{ + "title": "initcap", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +## initcap +### description +#### Syntax + +`VARCHAR initcap(VARCHAR str)` + +将参数中包含的单词首字母大写,其余字母转为小写。单词是由非字母数字字符分隔的字母数字字符序列。 + +### example + +``` +mysql> select initcap('hello hello.,HELLO123HELlo'); ++---------------------------------------+ +| initcap('hello hello.,HELLO123HELlo') | ++---------------------------------------+ +| Hello Hello.,Hello123hello | ++---------------------------------------+ +``` +### keywords + INITCAP diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index f75c1ff680..585b268ad8 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2030,6 +2030,8 @@ visible_functions = [ '_ZN5doris15StringFunctions5lowerEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['upper', 'ucase'], 'VARCHAR', ['VARCHAR'], '_ZN5doris15StringFunctions5upperEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], + [['initcap'], 'VARCHAR', ['VARCHAR'], + '_ZN5doris15StringFunctions7initcapEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['trim'], 'VARCHAR', ['VARCHAR'], '_ZN5doris15StringFunctions4trimEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''], [['ltrim'], 'VARCHAR', ['VARCHAR'], diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out index 4d6b141073..d0f2b8c71a 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function.out @@ -134,6 +134,9 @@ abc123 -- !sql -- abc123 +-- !sql -- +Abc123abc Abc.Abc,?|Abc + -- !sql -- Hello diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index ceb1989b10..80ef709bd3 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -77,6 +77,8 @@ suite("test_string_function") { qt_sql "SELECT lcase(\"AbC123\");" qt_sql "SELECT lower(\"AbC123\");" + qt_sql "SELECT initcap(\"AbC123abc abc.abc,?|abc\");" + qt_sql "select left(\"Hello doris\",5);" qt_sql "select right(\"Hello doris\",5);" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org