This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-c108335-hive-sql in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-c108335-hive-sql by this push: new 819b393d839 [feature](function) impl str_to_map (#49142) (#49916) 819b393d839 is described below commit 819b393d83975047f3add767fcbb2a252a1b7ccb Author: Socrates <suyit...@selectdb.com> AuthorDate: Thu Apr 10 04:03:20 2025 +0800 [feature](function) impl str_to_map (#49142) (#49916) patch to fix --- be/src/vec/functions/function_map.cpp | 83 ++++++-- .../expressions/functions/scalar/StrToMap.java | 8 +- .../string_functions/test_str_to_map.out | Bin 0 -> 17405 bytes .../string_functions/test_str_to_map.groovy | 232 +++++++++++++++++++++ 4 files changed, 295 insertions(+), 28 deletions(-) diff --git a/be/src/vec/functions/function_map.cpp b/be/src/vec/functions/function_map.cpp index 3d8b84bdf37..6a825c5e76f 100644 --- a/be/src/vec/functions/function_map.cpp +++ b/be/src/vec/functions/function_map.cpp @@ -296,8 +296,6 @@ public: String get_name() const override { return name; } - bool is_variadic() const override { return true; } - size_t get_number_of_arguments() const override { return 3; } DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { @@ -309,29 +307,75 @@ public: uint32_t result, size_t input_rows_count) const override { DCHECK(arguments.size() == 3); + bool cols_const[2]; + ColumnPtr cols[2]; + for (size_t i = 0; i < 2; ++i) { + cols_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); + } + // convert to full column if necessary + default_preprocess_parameter_columns(cols, cols_const, {0, 1}, block, arguments); + const auto& [col3, col3_const] = + unpack_if_const(block.get_by_position(arguments[2]).column); + + const auto& str_column = assert_cast<const ColumnString*>(cols[0].get()); + const auto& pair_delim_column = assert_cast<const ColumnString*>(cols[1].get()); + const auto& kv_delim_column = assert_cast<const ColumnString*>(col3.get()); + + ColumnPtr result_col; + if (cols_const[0] && cols_const[1]) { + result_col = execute_vector<true, false>(input_rows_count, *str_column, + *pair_delim_column, *kv_delim_column); + } else if (col3_const) { + result_col = execute_vector<false, true>(input_rows_count, *str_column, + *pair_delim_column, *kv_delim_column); + } else { + result_col = execute_vector<false, false>(input_rows_count, *str_column, + *pair_delim_column, *kv_delim_column); + } + + block.replace_by_position(result, std::move(result_col)); + + return Status::OK(); + } + +private: + template <bool is_str_and_pair_delim_const, bool is_kv_delim_const> + static ColumnPtr execute_vector(const size_t input_rows_count, const ColumnString& str_col, + const ColumnString& pair_delim_col, + const ColumnString& kv_delim_col) { // map keys column auto result_col_map_keys_data = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); + result_col_map_keys_data->reserve(input_rows_count); // map values column auto result_col_map_vals_data = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); + result_col_map_vals_data->reserve(input_rows_count); // map offsets column auto result_col_map_offsets = ColumnUInt64::create(); - - auto& str_col = block.get_by_position(arguments[0]).column; - auto& pair_delim_col = block.get_by_position(arguments[1]).column; - auto& kv_delim_col = block.get_by_position(arguments[2]).column; - - const auto* str_column = assert_cast<const ColumnString*>(str_col.get()); - const auto* pair_delim_column = assert_cast<const ColumnString*>(pair_delim_col.get()); - const auto* kv_delim_column = assert_cast<const ColumnString*>(kv_delim_col.get()); + result_col_map_offsets->reserve(input_rows_count); + + std::vector<std::string_view> kvs; + std::string_view kv_delim; + if constexpr (is_str_and_pair_delim_const) { + auto str = str_col.get_data_at(0).to_string_view(); + auto pair_delim = pair_delim_col.get_data_at(0).to_string_view(); + kvs = split_pair_by_delim(str, pair_delim); + } + if constexpr (is_kv_delim_const) { + kv_delim = kv_delim_col.get_data_at(0).to_string_view(); + } for (size_t i = 0; i < input_rows_count; ++i) { - const auto str = str_column->get_data_at(i).to_string_view(); - const auto pair_delim = pair_delim_column->get_data_at(i).to_string_view(); - const auto kv_delim = kv_delim_column->get_data_at(i).to_string_view(); + if constexpr (!is_str_and_pair_delim_const) { + auto str = str_col.get_data_at(i).to_string_view(); + auto pair_delim = pair_delim_col.get_data_at(i).to_string_view(); + kvs = split_pair_by_delim(str, pair_delim); + } + if constexpr (!is_kv_delim_const) { + kv_delim = kv_delim_col.get_data_at(i).to_string_view(); + } - auto kvs = split_pair_by_delim(str, pair_delim); for (const auto& kv : kvs) { auto kv_parts = split_kv_by_delim(kv, kv_delim); if (kv_parts.size() == 2) { @@ -345,16 +389,11 @@ public: result_col_map_offsets->insert_value(result_col_map_keys_data->size()); } - auto result_col = ColumnMap::create(std::move(result_col_map_keys_data), - std::move(result_col_map_vals_data), - std::move(result_col_map_offsets)); - - block.replace_by_position(result, std::move(result_col)); - - return Status::OK(); + return ColumnMap::create(std::move(result_col_map_keys_data), + std::move(result_col_map_vals_data), + std::move(result_col_map_offsets)); } -private: static std::vector<std::string_view> split_pair_by_delim(const std::string_view& str, const std::string_view& delim) { if (str.empty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java index d31f76cdefd..89df45d01c0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java @@ -20,6 +20,7 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.MapType; @@ -44,7 +45,7 @@ import java.util.List; * Both pairDelim and keyValueDelim are treated as regular expressions. */ public class StrToMap extends ScalarFunction - implements ExplicitlyCastableSignature { + implements ExplicitlyCastableSignature, PropagateNullable { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(MapType.of(StringType.INSTANCE, StringType.INSTANCE)) @@ -82,11 +83,6 @@ public class StrToMap extends ScalarFunction super("str_to_map", arg0, arg1, arg2); } - @Override - public boolean nullable() { - return false; - } - /** * withChildren. */ diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out new file mode 100644 index 00000000000..6dd44129806 Binary files /dev/null and b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out differ diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy new file mode 100644 index 00000000000..e11e2310b73 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy @@ -0,0 +1,232 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_str_to_map") { + sql "drop table if exists str_to_map_args;" + sql """ + create table str_to_map_args ( + k0 int, + map_str_not_null string not null, + map_str_null string null, + key_delim_not_null string not null, + key_delim_null string null, + value_delim_not_null string not null, + value_delim_null string null + ) + DISTRIBUTED BY HASH(k0) + PROPERTIES + ( + "replication_num" = "1" + ); + """ + + // Test empty table with different nullable combinations + order_qt_all_not_null "select str_to_map(map_str_not_null, key_delim_not_null, value_delim_not_null) from str_to_map_args" + + order_qt_all_args_null "select str_to_map(map_str_null, key_delim_null, value_delim_null) from str_to_map_args" + + order_qt_partial_null "select str_to_map(map_str_not_null, key_delim_null, value_delim_null) from str_to_map_args" + + order_qt_nullable_no_null "select str_to_map(nullable(map_str_not_null), nullable(key_delim_not_null), nullable(value_delim_not_null)) from str_to_map_args" + + sql ''' + insert into str_to_map_args values + (1, 'a:1,b:2,c:3', 'a:1,b:2,c:3', ',', ',', ':', ':'), + (2, '', '', ',', ',', ':', ':'), -- Empty string test + (3, 'a:1', 'a:1', ',', ',', ':', ':'), -- Single key-value pair + (4, 'a:1,b:2,b:3', 'a:1,b:2,b:3', ',', ',', ':', ':'), -- Duplicate keys + (5, 'a:,b:,c:', 'a:,b:,c:', ',', ',', ':', ':'), -- Empty values + (6, ':1,:2,:3', ':1,:2,:3', ',', ',', ':', ':'), -- Empty keys + (7, 'a=1;b=2;c=3', 'a=1;b=2;c=3', ';', ';', '=', '='), -- Different delimiters + (8, '中文:值,英文:value', '中文:值,英文:value', ',', ',', ':', ':'), -- Unicode characters + (9, 'special@#:123,chars!:456', 'special@#:123,chars!:456', ',', ',', ':', ':'), -- Special characters in keys + (10, 'a:123!@#,b:456$%^', 'a:123!@#,b:456$%^', ',', ',', ':', ':'), -- Special characters in values + (11, 'verylongkey:verylongvalue,anotherlongkey:anotherlongvalue', 'verylongkey:verylongvalue,anotherlongkey:anotherlongvalue', ',', ',', ':', ':'), -- Long strings + (12, 'a::1,b::2', 'a::1,b::2', ',', ',', '::', '::'), -- Multi-character delimiter + (13, 'a:1\nb:2\nc:3', 'a:1\nb:2\nc:3', '\n', '\n', ':', ':'), -- Newline as delimiter + (14, 'a:1\tb:2\tc:3', 'a:1\tb:2\tc:3', '\t', '\t', ':', ':'), -- Tab as delimiter + (15, ' a : 1 , b : 2 ', ' a : 1 , b : 2 ', ',', ',', ':', ':') -- Spaces in string + ''' + + // Test different nullable combinations with data + order_qt_all_not_null_data """ + select str_to_map(map_str_not_null, key_delim_not_null, value_delim_not_null) + from str_to_map_args + order by k0; + """ + + order_qt_all_args_null_data """ + select str_to_map(map_str_null, key_delim_null, value_delim_null) + from str_to_map_args + order by k0; + """ + + order_qt_partial_null_data """ + select str_to_map(map_str_not_null, key_delim_null, value_delim_null) + from str_to_map_args + order by k0; + """ + + order_qt_nullable_no_null_data """ + select str_to_map(nullable(map_str_not_null), nullable(key_delim_not_null), nullable(value_delim_not_null)) + from str_to_map_args + order by k0; + """ + + // Test mixed nullable combinations + order_qt_mixed_null_1 """ + select str_to_map(map_str_null, key_delim_not_null, value_delim_not_null) + from str_to_map_args + order by k0; + """ + + order_qt_mixed_null_2 """ + select str_to_map(map_str_not_null, key_delim_null, value_delim_not_null) + from str_to_map_args + order by k0; + """ + + order_qt_mixed_null_3 """ + select str_to_map(map_str_not_null, key_delim_not_null, value_delim_null) + from str_to_map_args + order by k0; + """ + + // Test with constant null values + order_qt_const_null_1 """ + select str_to_map(null, key_delim_not_null, value_delim_not_null) + from str_to_map_args + order by k0; + """ + + order_qt_const_null_2 """ + select str_to_map(map_str_not_null, null, value_delim_not_null) + from str_to_map_args + order by k0; + """ + + order_qt_const_null_3 """ + select str_to_map(map_str_not_null, key_delim_not_null, null) + from str_to_map_args + order by k0; + """ + + /// consts. most by BE-UT + // Test const string with column delimiters + order_qt_const_str """ + select str_to_map('a:1,b:2', key_delim_not_null, value_delim_not_null) + from str_to_map_args order by k0 + """ + + // Test column string with const delimiters + order_qt_const_delims """ + select str_to_map(map_str_not_null, ',', ':') + from str_to_map_args order by k0 + """ + + // Test const string with one const delimiter and one column delimiter + order_qt_mixed_const1 """ + select str_to_map('x=1;y=2', ';', value_delim_not_null) + from str_to_map_args order by k0 + """ + + order_qt_mixed_const2 """ + select str_to_map('p-1|q-2', key_delim_not_null, '-') + from str_to_map_args order by k0 + """ + + // Test all const non-null arguments + order_qt_all_const """ + select str_to_map('a=1|b=2', '|', '=') + from str_to_map_args order by k0 + """ + + // Test const string with nullable column delimiters + order_qt_const_str_null_delims """ + select str_to_map('m:1,n:2', key_delim_null, value_delim_null) + from str_to_map_args order by k0 + """ + + // Test nullable column string with const delimiters + order_qt_null_str_const_delims ''' + select str_to_map(map_str_null, '#', '$') + from str_to_map_args order by k0 + ''' + + // Test basic str_to_map functionality with all parameters + qt_basic_1 "select str_to_map('a:1,b:2,c:3', ',', ':');" + qt_basic_2 "select str_to_map('key1=val1;key2=val2', ';', '=');" + qt_basic_3 "select str_to_map('x-1|y-2|z-3', '|', '-');" + + // Test with default parameters (omitting both delimiters) + // Default pair delimiter is ',' and key-value delimiter is ':' + qt_default_both_1 "select str_to_map('a:1,b:2,c:3');" + qt_default_both_2 "select str_to_map('key1:value1,key2:value2');" + qt_default_both_3 "select str_to_map('x:1,y:2,z:');" + qt_default_both_4 "select str_to_map('');" + + // Test with default key-value delimiter (omitting last parameter) + // Default key-value delimiter is ':' + qt_default_value_1 "select str_to_map('a:1;b:2;c:3', ';');" + qt_default_value_2 "select str_to_map('key:val|foo:bar', '|');" + qt_default_value_3 "select str_to_map('x:1#y:2#z:3', '#');" + qt_default_value_4 "select str_to_map('a:1...b:2...c:3', '...');" + + // Test empty string cases + qt_empty_1 "select str_to_map('');" + qt_empty_2 "select str_to_map('a:1,,b:2');" + qt_empty_3 "select str_to_map('a:,b:2,c:');" + qt_empty_4 "select str_to_map(',,,');" + + // Test missing key-value delimiter + qt_missing_value_1 "select str_to_map('a,b:2,c');" + qt_missing_value_2 "select str_to_map('val1,val2,val3');" + qt_missing_value_3 "select str_to_map('key1,key2:val2,key3');" + + // Test with special characters + qt_special_1 "select str_to_map('\ta:1\n,\tb:2\n');" + qt_special_2 "select str_to_map('a\\nb:1,c\\td:2');" + qt_special_3 "select str_to_map('key1:value1,key2:value2', ',', ':');" + + // Test with spaces + qt_spaces_1 "select str_to_map('a : 1, b : 2');" + qt_spaces_2 "select str_to_map(' a:1 , b:2 ');" + qt_spaces_3 "select str_to_map(' a:1, b:2 ');" + qt_spaces_4 "select str_to_map(' ');" + + // Test with Unicode characters + qt_unicode_1 "select str_to_map('键1:值1,键2:值2');" + qt_unicode_2 "select str_to_map('标题①:内容①,标题②:内容②');" + qt_unicode_3 "select str_to_map('🔑:🔒,📝:📖');" + qt_unicode_4 "select str_to_map('あ:い,う:え');" + + // Test with duplicate keys + qt_dup_1 "select str_to_map('a:1,b:2,a:3');" + qt_dup_2 "select str_to_map('key:val1,key:val2,key:val3');" + qt_dup_3 "select str_to_map('a:1,a:,a:3');" + + // Test edge cases + qt_edge_1 "select str_to_map('a:1:2,b:3:4');" + qt_edge_2 "select str_to_map(':::');" + qt_edge_3 "select str_to_map('a:1:2');" + qt_edge_4 "select str_to_map('key::value');" + qt_edge_5 "select str_to_map(':');" + + // Test extremely long strings + qt_long_1 "select str_to_map(repeat('a:1,', 1000));" + qt_long_2 "select str_to_map(concat(repeat('key', 100), ':', repeat('value', 100)));" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org