This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-c108335-hive-sql
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-c108335-hive-sql by 
this push:
     new 819b393d839 [feature](function) impl str_to_map (#49142) (#49916)
819b393d839 is described below

commit 819b393d83975047f3add767fcbb2a252a1b7ccb
Author: Socrates <suyit...@selectdb.com>
AuthorDate: Thu Apr 10 04:03:20 2025 +0800

    [feature](function) impl str_to_map (#49142) (#49916)
    
    patch to fix
---
 be/src/vec/functions/function_map.cpp              |  83 ++++++--
 .../expressions/functions/scalar/StrToMap.java     |   8 +-
 .../string_functions/test_str_to_map.out           | Bin 0 -> 17405 bytes
 .../string_functions/test_str_to_map.groovy        | 232 +++++++++++++++++++++
 4 files changed, 295 insertions(+), 28 deletions(-)

diff --git a/be/src/vec/functions/function_map.cpp 
b/be/src/vec/functions/function_map.cpp
index 3d8b84bdf37..6a825c5e76f 100644
--- a/be/src/vec/functions/function_map.cpp
+++ b/be/src/vec/functions/function_map.cpp
@@ -296,8 +296,6 @@ public:
 
     String get_name() const override { return name; }
 
-    bool is_variadic() const override { return true; }
-
     size_t get_number_of_arguments() const override { return 3; }
 
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
@@ -309,29 +307,75 @@ public:
                         uint32_t result, size_t input_rows_count) const 
override {
         DCHECK(arguments.size() == 3);
 
+        bool cols_const[2];
+        ColumnPtr cols[2];
+        for (size_t i = 0; i < 2; ++i) {
+            cols_const[i] = 
is_column_const(*block.get_by_position(arguments[i]).column);
+        }
+        // convert to full column if necessary
+        default_preprocess_parameter_columns(cols, cols_const, {0, 1}, block, 
arguments);
+        const auto& [col3, col3_const] =
+                unpack_if_const(block.get_by_position(arguments[2]).column);
+
+        const auto& str_column = assert_cast<const 
ColumnString*>(cols[0].get());
+        const auto& pair_delim_column = assert_cast<const 
ColumnString*>(cols[1].get());
+        const auto& kv_delim_column = assert_cast<const 
ColumnString*>(col3.get());
+
+        ColumnPtr result_col;
+        if (cols_const[0] && cols_const[1]) {
+            result_col = execute_vector<true, false>(input_rows_count, 
*str_column,
+                                                     *pair_delim_column, 
*kv_delim_column);
+        } else if (col3_const) {
+            result_col = execute_vector<false, true>(input_rows_count, 
*str_column,
+                                                     *pair_delim_column, 
*kv_delim_column);
+        } else {
+            result_col = execute_vector<false, false>(input_rows_count, 
*str_column,
+                                                      *pair_delim_column, 
*kv_delim_column);
+        }
+
+        block.replace_by_position(result, std::move(result_col));
+
+        return Status::OK();
+    }
+
+private:
+    template <bool is_str_and_pair_delim_const, bool is_kv_delim_const>
+    static ColumnPtr execute_vector(const size_t input_rows_count, const 
ColumnString& str_col,
+                                    const ColumnString& pair_delim_col,
+                                    const ColumnString& kv_delim_col) {
         // map keys column
         auto result_col_map_keys_data =
                 ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create());
+        result_col_map_keys_data->reserve(input_rows_count);
         // map values column
         auto result_col_map_vals_data =
                 ColumnNullable::create(ColumnString::create(), 
ColumnUInt8::create());
+        result_col_map_vals_data->reserve(input_rows_count);
         // map offsets column
         auto result_col_map_offsets = ColumnUInt64::create();
-
-        auto& str_col = block.get_by_position(arguments[0]).column;
-        auto& pair_delim_col = block.get_by_position(arguments[1]).column;
-        auto& kv_delim_col = block.get_by_position(arguments[2]).column;
-
-        const auto* str_column = assert_cast<const 
ColumnString*>(str_col.get());
-        const auto* pair_delim_column = assert_cast<const 
ColumnString*>(pair_delim_col.get());
-        const auto* kv_delim_column = assert_cast<const 
ColumnString*>(kv_delim_col.get());
+        result_col_map_offsets->reserve(input_rows_count);
+
+        std::vector<std::string_view> kvs;
+        std::string_view kv_delim;
+        if constexpr (is_str_and_pair_delim_const) {
+            auto str = str_col.get_data_at(0).to_string_view();
+            auto pair_delim = pair_delim_col.get_data_at(0).to_string_view();
+            kvs = split_pair_by_delim(str, pair_delim);
+        }
+        if constexpr (is_kv_delim_const) {
+            kv_delim = kv_delim_col.get_data_at(0).to_string_view();
+        }
 
         for (size_t i = 0; i < input_rows_count; ++i) {
-            const auto str = str_column->get_data_at(i).to_string_view();
-            const auto pair_delim = 
pair_delim_column->get_data_at(i).to_string_view();
-            const auto kv_delim = 
kv_delim_column->get_data_at(i).to_string_view();
+            if constexpr (!is_str_and_pair_delim_const) {
+                auto str = str_col.get_data_at(i).to_string_view();
+                auto pair_delim = 
pair_delim_col.get_data_at(i).to_string_view();
+                kvs = split_pair_by_delim(str, pair_delim);
+            }
+            if constexpr (!is_kv_delim_const) {
+                kv_delim = kv_delim_col.get_data_at(i).to_string_view();
+            }
 
-            auto kvs = split_pair_by_delim(str, pair_delim);
             for (const auto& kv : kvs) {
                 auto kv_parts = split_kv_by_delim(kv, kv_delim);
                 if (kv_parts.size() == 2) {
@@ -345,16 +389,11 @@ public:
             
result_col_map_offsets->insert_value(result_col_map_keys_data->size());
         }
 
-        auto result_col = 
ColumnMap::create(std::move(result_col_map_keys_data),
-                                            
std::move(result_col_map_vals_data),
-                                            std::move(result_col_map_offsets));
-
-        block.replace_by_position(result, std::move(result_col));
-
-        return Status::OK();
+        return ColumnMap::create(std::move(result_col_map_keys_data),
+                                 std::move(result_col_map_vals_data),
+                                 std::move(result_col_map_offsets));
     }
 
-private:
     static std::vector<std::string_view> split_pair_by_delim(const 
std::string_view& str,
                                                              const 
std::string_view& delim) {
         if (str.empty()) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
index d31f76cdefd..89df45d01c0 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
@@ -20,6 +20,7 @@ package 
org.apache.doris.nereids.trees.expressions.functions.scalar;
 import org.apache.doris.catalog.FunctionSignature;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import 
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.MapType;
@@ -44,7 +45,7 @@ import java.util.List;
  * Both pairDelim and keyValueDelim are treated as regular expressions.
  */
 public class StrToMap extends ScalarFunction
-        implements ExplicitlyCastableSignature {
+        implements ExplicitlyCastableSignature, PropagateNullable {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             FunctionSignature.ret(MapType.of(StringType.INSTANCE, 
StringType.INSTANCE))
@@ -82,11 +83,6 @@ public class StrToMap extends ScalarFunction
         super("str_to_map", arg0, arg1, arg2);
     }
 
-    @Override
-    public boolean nullable() {
-        return false;
-    }
-
     /**
      * withChildren.
      */
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
new file mode 100644
index 00000000000..6dd44129806
Binary files /dev/null and 
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy
new file mode 100644
index 00000000000..e11e2310b73
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_str_to_map") {
+    sql "drop table if exists str_to_map_args;"
+    sql """
+        create table str_to_map_args (
+            k0 int,
+            map_str_not_null string not null,
+            map_str_null string null,
+            key_delim_not_null string not null,
+            key_delim_null string null,
+            value_delim_not_null string not null,
+            value_delim_null string null
+        )
+        DISTRIBUTED BY HASH(k0)
+        PROPERTIES
+        (
+            "replication_num" = "1"
+        );
+    """
+
+    // Test empty table with different nullable combinations
+    order_qt_all_not_null "select str_to_map(map_str_not_null, 
key_delim_not_null, value_delim_not_null) from str_to_map_args"
+    
+    order_qt_all_args_null "select str_to_map(map_str_null, key_delim_null, 
value_delim_null) from str_to_map_args"
+    
+    order_qt_partial_null "select str_to_map(map_str_not_null, key_delim_null, 
value_delim_null) from str_to_map_args"
+    
+    order_qt_nullable_no_null "select str_to_map(nullable(map_str_not_null), 
nullable(key_delim_not_null), nullable(value_delim_not_null)) from 
str_to_map_args"
+
+    sql '''
+    insert into str_to_map_args values
+        (1, 'a:1,b:2,c:3', 'a:1,b:2,c:3', ',', ',', ':', ':'),
+        (2, '', '', ',', ',', ':', ':'), -- Empty string test
+        (3, 'a:1', 'a:1', ',', ',', ':', ':'), -- Single key-value pair
+        (4, 'a:1,b:2,b:3', 'a:1,b:2,b:3', ',', ',', ':', ':'), -- Duplicate 
keys
+        (5, 'a:,b:,c:', 'a:,b:,c:', ',', ',', ':', ':'), -- Empty values
+        (6, ':1,:2,:3', ':1,:2,:3', ',', ',', ':', ':'), -- Empty keys
+        (7, 'a=1;b=2;c=3', 'a=1;b=2;c=3', ';', ';', '=', '='), -- Different 
delimiters
+        (8, '中文:值,英文:value', '中文:值,英文:value', ',', ',', ':', ':'), -- Unicode 
characters
+        (9, 'special@#:123,chars!:456', 'special@#:123,chars!:456', ',', ',', 
':', ':'), -- Special characters in keys
+        (10, 'a:123!@#,b:456$%^', 'a:123!@#,b:456$%^', ',', ',', ':', ':'), -- 
Special characters in values
+        (11, 'verylongkey:verylongvalue,anotherlongkey:anotherlongvalue', 
'verylongkey:verylongvalue,anotherlongkey:anotherlongvalue', ',', ',', ':', 
':'), -- Long strings
+        (12, 'a::1,b::2', 'a::1,b::2', ',', ',', '::', '::'), -- 
Multi-character delimiter
+        (13, 'a:1\nb:2\nc:3', 'a:1\nb:2\nc:3', '\n', '\n', ':', ':'), -- 
Newline as delimiter
+        (14, 'a:1\tb:2\tc:3', 'a:1\tb:2\tc:3', '\t', '\t', ':', ':'), -- Tab 
as delimiter
+        (15, ' a : 1 , b : 2 ', ' a : 1 , b : 2 ', ',', ',', ':', ':') -- 
Spaces in string
+    '''
+
+    // Test different nullable combinations with data
+    order_qt_all_not_null_data """
+        select str_to_map(map_str_not_null, key_delim_not_null, 
value_delim_not_null) 
+        from str_to_map_args 
+        order by k0;
+    """
+
+    order_qt_all_args_null_data """
+        select str_to_map(map_str_null, key_delim_null, value_delim_null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    order_qt_partial_null_data """
+        select str_to_map(map_str_not_null, key_delim_null, value_delim_null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    order_qt_nullable_no_null_data """
+        select str_to_map(nullable(map_str_not_null), 
nullable(key_delim_not_null), nullable(value_delim_not_null))
+        from str_to_map_args
+        order by k0;
+    """
+
+    // Test mixed nullable combinations
+    order_qt_mixed_null_1 """
+        select str_to_map(map_str_null, key_delim_not_null, 
value_delim_not_null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    order_qt_mixed_null_2 """
+        select str_to_map(map_str_not_null, key_delim_null, 
value_delim_not_null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    order_qt_mixed_null_3 """
+        select str_to_map(map_str_not_null, key_delim_not_null, 
value_delim_null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    // Test with constant null values
+    order_qt_const_null_1 """
+        select str_to_map(null, key_delim_not_null, value_delim_not_null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    order_qt_const_null_2 """
+        select str_to_map(map_str_not_null, null, value_delim_not_null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    order_qt_const_null_3 """
+        select str_to_map(map_str_not_null, key_delim_not_null, null)
+        from str_to_map_args
+        order by k0;
+    """
+
+    /// consts. most by BE-UT
+    // Test const string with column delimiters
+    order_qt_const_str """
+        select str_to_map('a:1,b:2', key_delim_not_null, value_delim_not_null) 
+        from str_to_map_args order by k0
+    """
+    
+    // Test column string with const delimiters  
+    order_qt_const_delims """
+        select str_to_map(map_str_not_null, ',', ':') 
+        from str_to_map_args order by k0
+    """
+        
+    // Test const string with one const delimiter and one column delimiter
+    order_qt_mixed_const1 """
+        select str_to_map('x=1;y=2', ';', value_delim_not_null) 
+        from str_to_map_args order by k0
+    """
+    
+    order_qt_mixed_const2 """
+        select str_to_map('p-1|q-2', key_delim_not_null, '-') 
+        from str_to_map_args order by k0
+    """
+    
+    // Test all const non-null arguments
+    order_qt_all_const """
+        select str_to_map('a=1|b=2', '|', '=') 
+        from str_to_map_args order by k0
+    """
+    
+    // Test const string with nullable column delimiters
+    order_qt_const_str_null_delims """
+        select str_to_map('m:1,n:2', key_delim_null, value_delim_null) 
+        from str_to_map_args order by k0
+    """
+    
+    // Test nullable column string with const delimiters
+    order_qt_null_str_const_delims '''
+        select str_to_map(map_str_null, '#', '$') 
+        from str_to_map_args order by k0
+    '''
+
+    // Test basic str_to_map functionality with all parameters
+    qt_basic_1 "select str_to_map('a:1,b:2,c:3', ',', ':');"
+    qt_basic_2 "select str_to_map('key1=val1;key2=val2', ';', '=');"
+    qt_basic_3 "select str_to_map('x-1|y-2|z-3', '|', '-');"
+
+    // Test with default parameters (omitting both delimiters)
+    // Default pair delimiter is ',' and key-value delimiter is ':'
+    qt_default_both_1 "select str_to_map('a:1,b:2,c:3');"
+    qt_default_both_2 "select str_to_map('key1:value1,key2:value2');"
+    qt_default_both_3 "select str_to_map('x:1,y:2,z:');"
+    qt_default_both_4 "select str_to_map('');"
+
+    // Test with default key-value delimiter (omitting last parameter)
+    // Default key-value delimiter is ':'
+    qt_default_value_1 "select str_to_map('a:1;b:2;c:3', ';');"
+    qt_default_value_2 "select str_to_map('key:val|foo:bar', '|');"
+    qt_default_value_3 "select str_to_map('x:1#y:2#z:3', '#');"
+    qt_default_value_4 "select str_to_map('a:1...b:2...c:3', '...');"
+
+    // Test empty string cases
+    qt_empty_1 "select str_to_map('');"
+    qt_empty_2 "select str_to_map('a:1,,b:2');" 
+    qt_empty_3 "select str_to_map('a:,b:2,c:');"
+    qt_empty_4 "select str_to_map(',,,');"
+
+    // Test missing key-value delimiter
+    qt_missing_value_1 "select str_to_map('a,b:2,c');"
+    qt_missing_value_2 "select str_to_map('val1,val2,val3');"
+    qt_missing_value_3 "select str_to_map('key1,key2:val2,key3');"
+
+    // Test with special characters
+    qt_special_1 "select str_to_map('\ta:1\n,\tb:2\n');"
+    qt_special_2 "select str_to_map('a\\nb:1,c\\td:2');"
+    qt_special_3 "select str_to_map('key1:value1,key2:value2', ',', ':');"
+
+    // Test with spaces
+    qt_spaces_1 "select str_to_map('a : 1, b : 2');"
+    qt_spaces_2 "select str_to_map(' a:1 , b:2 ');"
+    qt_spaces_3 "select str_to_map('   a:1,   b:2   ');"
+    qt_spaces_4 "select str_to_map(' ');"
+
+    // Test with Unicode characters
+    qt_unicode_1 "select str_to_map('键1:值1,键2:值2');"
+    qt_unicode_2 "select str_to_map('标题①:内容①,标题②:内容②');"
+    qt_unicode_3 "select str_to_map('🔑:🔒,📝:📖');"
+    qt_unicode_4 "select str_to_map('あ:い,う:え');"
+
+    // Test with duplicate keys
+    qt_dup_1 "select str_to_map('a:1,b:2,a:3');"
+    qt_dup_2 "select str_to_map('key:val1,key:val2,key:val3');"
+    qt_dup_3 "select str_to_map('a:1,a:,a:3');"
+
+    // Test edge cases
+    qt_edge_1 "select str_to_map('a:1:2,b:3:4');"
+    qt_edge_2 "select str_to_map(':::');"
+    qt_edge_3 "select str_to_map('a:1:2');"
+    qt_edge_4 "select str_to_map('key::value');"
+    qt_edge_5 "select str_to_map(':');"
+
+    // Test extremely long strings
+    qt_long_1 "select str_to_map(repeat('a:1,', 1000));"
+    qt_long_2 "select str_to_map(concat(repeat('key', 100), ':', 
repeat('value', 100)));"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to