(doris) branch branch-2.1 updated: [fix](array/map) Fix BE crash in lambda functions (#49139)

yiguolei Mon, 17 Mar 2025 23:18:52 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 16e348b189e [fix](array/map) Fix BE crash in lambda functions (#49139)
16e348b189e is described below

commit 16e348b189e634a8265c4fe62c45915b7d70a929
Author: Gabriel <liwenqi...@selectdb.com>
AuthorDate: Tue Mar 18 11:51:38 2025 +0800

    [fix](array/map) Fix BE crash in lambda functions (#49139)
---
 .../exprs/lambda_function/varray_map_function.cpp  |   9 +-
 .../vec/functions/array/function_array_element.h   |   5 +-
 .../data/function_p0/test_array_map.out            | Bin 0 -> 107 bytes
 .../suites/function_p0/test_array_map.groovy       | 227 +++++++++++++++++++++
 4 files changed, 236 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exprs/lambda_function/varray_map_function.cpp 
b/be/src/vec/exprs/lambda_function/varray_map_function.cpp
index f8d0479c53d..78b7c6cf68c 100644
--- a/be/src/vec/exprs/lambda_function/varray_map_function.cpp
+++ b/be/src/vec/exprs/lambda_function/varray_map_function.cpp
@@ -140,8 +140,8 @@ public:
             auto type_array = array_column_type_name.type;
             if (type_array->is_nullable()) {
                 // get the nullmap of nullable column
-                const auto& column_array_nullmap =
-                        assert_cast<const 
ColumnNullable&>(*column_array).get_null_map_column();
+                auto column_array_nullmap =
+                        assert_cast<const 
ColumnNullable&>(*column_array).get_null_map_column_ptr();
 
                 // get the array column from nullable column
                 column_array = assert_cast<const 
ColumnNullable*>(column_array.get())
@@ -152,8 +152,9 @@ public:
                                      ->get_nested_type();
 
                 // need to union nullmap from all columns
-                VectorizedUtils::update_null_map(outside_null_map->get_data(),
-                                                 
column_array_nullmap.get_data());
+                VectorizedUtils::update_null_map(
+                        outside_null_map->get_data(),
+                        assert_cast<const 
ColumnUInt8&>(*column_array_nullmap).get_data());
             }
 
             // here is the array column
diff --git a/be/src/vec/functions/array/function_array_element.h 
b/be/src/vec/functions/array/function_array_element.h
index eae1f1294c5..2d4c2e1c917 100644
--- a/be/src/vec/functions/array/function_array_element.h
+++ b/be/src/vec/functions/array/function_array_element.h
@@ -100,6 +100,9 @@ public:
         UInt8* dst_null_map = dst_null_column->get_data().data();
         const UInt8* src_null_map = nullptr;
         ColumnsWithTypeAndName args;
+        block.replace_by_position(
+                arguments[0],
+                
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const());
         auto col_left = block.get_by_position(arguments[0]);
         if (col_left.column->is_nullable()) {
             auto null_col = 
check_and_get_column<ColumnNullable>(*col_left.column);
@@ -327,7 +330,7 @@ private:
                                 const UInt8* src_null_map, UInt8* 
dst_null_map) const {
         // check array nested column type and get data
         auto left_column = 
arguments[0].column->convert_to_full_column_if_const();
-        const auto& array_column = reinterpret_cast<const 
ColumnArray&>(*left_column);
+        const auto& array_column = assert_cast<const 
ColumnArray&>(*left_column);
         const auto& offsets = array_column.get_offsets();
         DCHECK(offsets.size() == input_rows_count);
         const UInt8* nested_null_map = nullptr;
diff --git a/regression-test/data/function_p0/test_array_map.out 
b/regression-test/data/function_p0/test_array_map.out
new file mode 100644
index 00000000000..9c9c4c6c8a2
Binary files /dev/null and 
b/regression-test/data/function_p0/test_array_map.out differ
diff --git a/regression-test/suites/function_p0/test_array_map.groovy 
b/regression-test/suites/function_p0/test_array_map.groovy
new file mode 100644
index 00000000000..b93b130a329
--- /dev/null
+++ b/regression-test/suites/function_p0/test_array_map.groovy
@@ -0,0 +1,227 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_map") {
+    sql """
+        drop table if exists mock_table;
+    """
+
+    sql """
+        CREATE ALIAS FUNCTION clean_html_entity_test(string) WITH 
PARAMETER(html) AS
+        REPLACE(
+        REPLACE(
+        REPLACE(
+        REPLACE(
+        REPLACE(
+                REPLACE(
+                    REPLACE(
+                        REPLACE(
+                            REPLACE(
+                                REPLACE(
+                                    REPLACE(html, '&amp;', '&'),
+                                    '&lt;', '<'
+                                ),
+                                '&gt;', '>'
+                            ),
+                            '&quot;', '"'
+                        ),
+                        '&apos;', '\\\''
+                    ),'&euro;', '€'
+                ),
+                '&nbsp;', ' '
+            ), "Ⅰ", "I"), "Ⅱ", "II"), "Ⅲ", "III"),".", ". ");
+    """
+    sql """ CREATE ALIAS FUNCTION clean_html_tag_test(string) WITH  
PARAMETER(html) AS REGEXP_REPLACE(html, '</?[^>]+>', ''); """
+    sql """
+    CREATE TABLE `mock_table` (
+          `aa` varchar(255) NULL,
+          `ab` varchar(255) NULL,
+          `ac` varchar(255) NULL,
+          `ad` text NULL,
+          `ae` text NULL,
+          `af` text NULL,
+          `ag` text NULL,
+          `ah` text NULL,
+          `ai` text NULL,
+          `aj` varchar(255) NULL,
+          `ak` text NULL,
+          `al` text NULL,
+          `am` text NULL,
+          `an` text NULL,
+          `ao` text NULL,
+          `ap` text NULL,
+          `aq` text NULL,
+          `ar` text NULL,
+          `as` text NULL,
+          `at` text NULL,
+          `au` text NULL,
+          `av` bigint NULL,
+          `aw` text NULL,
+          `ax` varchar(255) NULL,
+          `ay` text NULL,
+          `az` varchar(255) NULL,
+          `ba` varchar(255) NULL,
+          `bb` varchar(255) NULL,
+          `bc` int NULL,
+          `bd` int NULL,
+          `be` varchar(255) NULL,
+          `bf` varchar(255) NULL,
+          `bg` array<varchar(255)> NULL,
+          `bh` json NULL,
+          `bi` varchar(255) NULL,
+          `bj` varchar(255) NULL,
+          `bk` array<varchar(255)> NULL,
+          `bl` boolean NULL,
+          INDEX idx_ag (`ag`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_ad (`ad`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_ah (`ah`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_ac (`ac`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_ak (`ak`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_al (`al`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_am (`am`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_ag_ngrambf (`ag`) USING NGRAM_BF PROPERTIES("bf_size" = 
"256", "gram_size" = "2"),
+          INDEX idx_ad_ngrambf (`ad`) USING NGRAM_BF PROPERTIES("bf_size" = 
"256", "gram_size" = "2"),
+          INDEX idx_ac_ngrambf (`ac`) USING NGRAM_BF PROPERTIES("bf_size" = 
"256", "gram_size" = "2"),
+          INDEX idx_ah_ngrambf (`ah`) USING NGRAM_BF PROPERTIES("bf_size" = 
"256", "gram_size" = "2"),
+          INDEX idx_bi (`bi`) USING INVERTED,
+          INDEX idx_ar (`ar`) USING INVERTED PROPERTIES("support_phrase" = 
"true", "parser" = "unicode", "lower_case" = "true"),
+          INDEX idx_ar_ngrambf (`ar`) USING NGRAM_BF PROPERTIES("bf_size" = 
"256", "gram_size" = "2"),
+          INDEX idx_bl (`bl`) USING INVERTED
+        ) ENGINE=OLAP
+        UNIQUE KEY(`aa`)
+        DISTRIBUTED BY HASH(`aa`) BUCKETS 16
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1",
+        "min_load_replica_num" = "-1",
+        "is_being_synced" = "false",
+        "storage_medium" = "hdd",
+        "storage_format" = "V2",
+        "inverted_index_storage_format" = "V1",
+        "enable_unique_key_merge_on_write" = "true",
+        "light_schema_change" = "true",
+        "disable_auto_compaction" = "false",
+        "enable_single_replica_compaction" = "false",
+        "group_commit_interval_ms" = "10000",
+        "group_commit_data_bytes" = "134217728",
+        "enable_mow_light_delete" = "false"
+        );
+    """
+    sql """
+     CREATE VIEW `mock_view` AS
+         WITH
+             bm AS (SELECT
+                 `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, 
`ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, 
`ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, 
`bk`, `bl`,
+                 CASE WHEN YEAR(`as`) >= 1970 THEN `as` ELSE NULL END as `bn`,
+                 CASE WHEN YEAR(`au`) >= 1970 THEN `au` ELSE NULL END as `bo`,
+                 CASE WHEN YEAR(`at`) >= 1970 THEN `at` ELSE NULL END as `bp`,
+                 LENGTH(`aw`) as `bq`,
+                 TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`))) 
as `br`,
+                 TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`))) 
as `bs`,
+                 ARRAY_MAP(x-> if(least((left(x, 5) = '6841-'), (length(x) = 
10)), concat_ws('-', substring(x, 1, 7), substring(x, 8)), if(least((left(x, 5) 
= '6841-'), (length(x) = 9)), concat_ws('-', substring(x, 1, 6), substring(x, 
7)), x)), `bk`) as `bt`,
+                 
ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ad`))),'"parser"="unicode",
 "lower_case"="false", "stopwords"="none"'), " ") as `bu`,
+                 
ARRAY_JOIN(TOKENIZE(TRIM(`clean_html_entity_test`(`clean_html_tag_test`(`ah`))),'"parser"="unicode",
 "lower_case"="false", "stopwords"="none"'), " ") as `bv`
+             FROM mock_table),
+             bw AS (SELECT
+                 `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, 
`ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, 
`ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, 
`bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`,
+                 CASE
+                     WHEN LENGTH(`bn`) = 10 THEN STR_TO_DATE(`bn`, 
'yyyy-MM-dd')
+                     WHEN LENGTH(`bn`) = 19 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd 
HH:mm:ss')
+                     WHEN LENGTH(`bn`) = 26 THEN STR_TO_DATE(`bn`, 'yyyy-MM-dd 
HH:mm:ss.SSSSSS')
+                     ELSE NULL
+                 END AS `bx`,
+                 CASE
+                     WHEN LENGTH(`bo`) = 10 THEN STR_TO_DATE(`bo`, 
'yyyy-MM-dd')
+                     WHEN LENGTH(`bo`) = 19 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd 
HH:mm:ss')
+                     WHEN LENGTH(`bo`) = 26 THEN STR_TO_DATE(`bo`, 'yyyy-MM-dd 
HH:mm:ss.SSSSSS')
+                     ELSE NULL
+                 END AS `by`,
+                 CASE
+                     WHEN LENGTH(`bp`) = 10 THEN STR_TO_DATE(`bp`, 
'yyyy-MM-dd')
+                     WHEN LENGTH(`bp`) = 19 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd 
HH:mm:ss')
+                     WHEN LENGTH(`bp`) = 26 THEN STR_TO_DATE(`bp`, 'yyyy-MM-dd 
HH:mm:ss.SSSSSS')
+                     ELSE NULL
+                 END AS `bz`,
+                 ARRAY_REMOVE(
+                 ARRAY_COMPACT(
+                 ARRAY_UNION(
+                     ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, 
'-'), 1, size(split_by_string(x, '-')) -1), '-'), `bt`),
+                     ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, 
'-'), 1, size(split_by_string(x, '-')) -2), '-'), `bt`),
+                     ARRAY_MAP(x-> ARRAY_JOIN(ARRAY_SLICE(split_by_string(x, 
'-'), 1, size(split_by_string(x, '-')) -3), '-'), `bt`))), '') as `ca`,
+                 SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ') as `cb`,
+                 SPLIT_BY_STRING(`bu`, ' ') as `cc`,
+                 array_first_index(x-> locate('*', x ) = 0, 
SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `cd`,
+                 array_last_index(x-> locate('*', x ) = 0, 
SPLIT_BY_STRING(MASK(`bu`, '*', '*', '*'), ' ')) as `ce`,
+                 SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ') as `cf`,
+                 SPLIT_BY_STRING(`bv`, ' ') as `cg`,
+                 array_first_index(x-> locate('*', x ) = 0, 
SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ch`,
+                 array_last_index(x-> locate('*', x ) = 0, 
SPLIT_BY_STRING(MASK(`bv`, '*', '*', '*'), ' ')) as `ci`
+             FROM bm),
+             cj AS (SELECT
+                 `aa`, `ab`, `ac`, `ad`, `ae`, `af`, `ag`, `ah`, `ai`, `aj`, 
`ak`, `al`, `am`, `an`, `ao`, `ap`, `aq`, `ar`, `as`, `at`, `au`, `av`, `aw`, 
`ax`, `ay`, `az`, `ba`, `bb`, `bc`, `bd`, `be`, `bf`, `bg`, `bh`, `bi`, `bj`, 
`bk`, `bl`, `bn`, `bo`, `bp`, `bq`, `br`, `bs`, `bt`, `bu`, `bv`, `bx`, `by`, 
`bz`, `ca`, `cb`, `cc`, `cd`, `ce`, `cf`, `cg`, `ch`, `ci`,
+                 ARRAY_COMPACT(ARRAY_EXCEPT(`bt`, `ca`)) as `ck`,
+                 ARRAY_COMPACT(ARRAY_UNION(`bt`, `ca`)) as `cl`,
+                 CASE
+                     WHEN SIZE(`cc`) = 0 THEN `bs`
+                     WHEN `cd`=1 AND `ce` < size(`cb`) and `ce` - `cd` > 1 
THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ")
+                     WHEN `cd`=2 AND `ce` < size(`cb`) and `ce` - `cd` > 1 
THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, 1, `ce`), " ")
+                     WHEN `cd` >2 AND `ce` = size(`cb`) and `ce` - `cd` > 1 
THEN
+                     CASE
+                         WHEN element_at(`cc`, 1) = element_at(`cc`, `cd`-1) 
THEN ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`-1), "")
+                         ELSE ARRAY_JOIN(ARRAY_SLICE(`cc`, `cd`), " ")
+                     END
+                     ELSE ARRAY_JOIN(`cc`, " ")
+                 END AS `cm`,
+                 CASE
+                     WHEN size(`cc`) = 0 THEN "tokenize_failed"
+                     WHEN `cd` = 0 THEN "en"
+                     WHEN `cd`=1 AND `ce` = size(`cb`) THEN "zh"
+                     WHEN `cd`=1 AND `ce` < size(`cb`) THEN "zh_en"
+                     WHEN `cd`=2 AND `ce` < size(`cb`) THEN "zh_en"
+                     WHEN `cd` >2 AND `ce` = size(`cb`) THEN "en_zh"
+                     ELSE "mixed"
+                 END AS `cn`,
+                 CASE
+                     WHEN SIZE(`cg`) = 0 THEN `br`
+                     WHEN `ch`=1 AND `ci` < size(`cf`) and `ci` - `ch` > 1 
THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ")
+                     WHEN `ch`=2 AND `ci` < size(`cf`) and `ci` - `ch` > 1 
THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, 1, `ci`), " ")
+                     WHEN `ch` >2 AND `ci` = size(`cf`) and `ci` - `ch` > 1 
THEN
+                     CASE
+                         WHEN element_at(`cg`, 1) = element_at(`cg`, `ch`-1) 
THEN ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`-1), "")
+                         ELSE ARRAY_JOIN(ARRAY_SLICE(`cg`, `ch`), " ")
+                     END
+                     ELSE ARRAY_JOIN(`cg`, " ")
+                 END AS `co`,
+                 CASE
+                     WHEN size(`cg`) = 0 THEN "tokenize_failed"
+                     WHEN `ch` = 0 THEN "en"
+                     WHEN `ch`=1 AND `ci` = size(`cf`) THEN "zh"
+                     WHEN `ch`=1 AND `ci` < size(`cf`) THEN "zh_en"
+                     WHEN `ch`=2 AND `ci` < size(`cf`) THEN "zh_en"
+                     WHEN `ch` >2 AND `ci` = size(`cf`) THEN "en_zh"
+                     ELSE "mixed"
+                 END AS `cp`
+             FROM bw)
+         SELECT * FROM cj;
+     """
+    sql """
+        insert into mock_table(aa, ab,ac,ad) values('1','2','3','4');
+    """
+
+    qt_sql """ 
+        SELECT * FROM mock_view LIMIT 530000,1000;
+    """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris) branch branch-2.1 updated: [fix](array/map) Fix BE crash in lambda functions (#49139)

Reply via email to