This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new edee8c1b8d7 [fix](struct)Fixed the issue of inserting into a struct 
type string literal with one more subfield causing BE coredump (#49485)
edee8c1b8d7 is described below

commit edee8c1b8d7191b20f44e9a95dc80ede6bd6c2e0
Author: amory <wangqian...@selectdb.com>
AuthorDate: Wed Mar 26 22:44:09 2025 +0800

    [fix](struct)Fixed the issue of inserting into a struct type string literal 
with one more subfield causing BE coredump (#49485)
    
    Fixed the issue of inserting into a struct type string literal with one
    more subfield causing BE coredump
    some situation like this blow will make BE core
    ```
    create table t(a int, b int, s struct<a:int>) PROPERTIES 
("replication_allocation" = "tag.location.default: 1");
    insert into t values(1,1,'{1,2}');
    ```
    core info:
    ```
    [WARNING!] /sys/kernel/mm/transparent_hugepage/enabled: [always] madvise 
never, Doris not recommend turning on THP, which may cause the BE process to 
use more memory and cannot be freed in time. Turn off THP: `echo madvise | sudo 
tee /sys/kernel/mm/transparent_hugepage/enabled`
    start BE in local mode
    =================================================================
    ==2818976==ERROR: AddressSanitizer: heap-buffer-overflow on address 
0x603000f17a90 at pc 0x55eba7fdd69c bp 0x7f03243dbd30 sp 0x7f03243dbd28
    READ of size 8 at 0x603000f17a90 thread T928 (brpc_light)
        #0 0x55eba7fdd69b in 
std::__shared_ptr<doris::vectorized::DataTypeSerDe, 
(__gnu_cxx::_Lock_policy)2>::get() const 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:1291:16
        #1 0x55eba7fdd649 in 
std::__shared_ptr_access<doris::vectorized::DataTypeSerDe, 
(__gnu_cxx::_Lock_policy)2, false, false>::_M_get() const 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:990:66
        #2 0x55eba7fda316 in 
std::__shared_ptr_access<doris::vectorized::DataTypeSerDe, 
(__gnu_cxx::_Lock_policy)2, false, false>::operator->() const 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:984:9
        #3 0x55ebcfc8b19e in 
doris::vectorized::DataTypeStructSerDe::deserialize_one_cell_from_json(doris::vectorized::IColumn&,
 doris::Slice&, doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_struct_serde.cpp:200:25
        #4 0x55ebdac49c8b in 
doris::vectorized::ConvertImplGenericFromString::execute(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/functions/function_cast.h:618:36
        #5 0x55ebda2234d0 in doris::Status std::__invoke_impl<doris::Status, 
doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long), doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, 
unsigned long>(std::__invoke_other, doris::Status (*&)(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<u [...]
        #6 0x55ebda22323a in std::enable_if<is_invocable_r_v<doris::Status, 
doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long), doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, 
unsigned long>, doris::Status>::type std::__invoke_r<doris::Status, 
doris::Status (*&)(doris::FunctionContext*, doris::ve [...]
        #7 0x55ebda222e71 in std::_Function_handler<doris::Status 
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, 
std::allocator<unsigned int>> const&, unsigned long, unsigned long), 
doris::Status (*)(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long)>::_M_invoke(std::_Any_data const&, doris::FunctionContext*&&, 
doris::vectorized::Block&, std::vector<unsigned int, s [...]
        #8 0x55ebd9b3f1d2 in std::function<doris::Status 
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, 
std::allocator<unsigned int>> const&, unsigned long, unsigned 
long)>::operator()(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, 
unsigned long) const 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function
 [...]
        #9 0x55ebda1ad7c7 in 
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
 std::shared_ptr<doris::vectorized::IDataType const> const&, 
std::shared_ptr<doris::vectorized::IDataType const> const&, bool) 
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long)::operator()(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std:: [...]
        #10 0x55ebda1acee2 in doris::Status std::__invoke_impl<doris::Status, 
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
 std::shared_ptr<doris::vectorized::IDataType const> const&, 
std::shared_ptr<doris::vectorized::IDataType const> const&, bool) 
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long)&, doris::FunctionContext*, doris::vectorized:: [...]
    ```
---
 .../data_types/serde/data_type_struct_serde.cpp    |   6 +-
 .../data/insert_p0/test_struct_insert.out          | Bin 625 -> 4698 bytes
 .../suites/insert_p0/test_struct_insert.groovy     | 101 ++++++++++++++++++++-
 3 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp 
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index ead4d0b2088..ee59138a372 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -158,7 +158,8 @@ Status 
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
             }
             Slice next(slice.data + start_pos, idx - start_pos);
             next.trim_prefix();
-            if (field_pos > elem_size) {
+            // field_pos should always less than elem_size, if not, we should 
return error
+            if (field_pos >= elem_size) {
                 // we should do column revert if error
                 for (size_t j = 0; j < field_pos; j++) {
                     struct_column.get_column(j).pop_back(1);
@@ -188,7 +189,8 @@ Status 
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
         (key_added || !is_explicit_names)) {
         Slice next(slice.data + start_pos, idx - start_pos);
         next.trim_prefix();
-        if (field_pos > elem_size) {
+        /// field_pos should always less than elem_size, if not, we should 
return error
+        if (field_pos >= elem_size) {
             // we should do column revert if error
             for (size_t j = 0; j < field_pos; j++) {
                 struct_column.get_column(j).pop_back(1);
diff --git a/regression-test/data/insert_p0/test_struct_insert.out 
b/regression-test/data/insert_p0/test_struct_insert.out
index 3dc160ef7f0..91230f415e2 100644
Binary files a/regression-test/data/insert_p0/test_struct_insert.out and 
b/regression-test/data/insert_p0/test_struct_insert.out differ
diff --git a/regression-test/suites/insert_p0/test_struct_insert.groovy 
b/regression-test/suites/insert_p0/test_struct_insert.groovy
index a845c978580..e2ef485fbcb 100644
--- a/regression-test/suites/insert_p0/test_struct_insert.groovy
+++ b/regression-test/suites/insert_p0/test_struct_insert.groovy
@@ -73,4 +73,103 @@ suite("test_struct_insert") {
 
     // select the table and check whether the data is correct
     qt_select "select * from ${testTable} order by k1"
-}
\ No newline at end of file
+
+  sql "DROP TABLE IF EXISTS test_struct_insert_into"
+    sql """
+    CREATE TABLE IF NOT EXISTS test_struct_insert_into (
+        id INT,
+        s STRUCT<a:INT>,
+        s1 STRUCT<a:INT, b:VARCHAR(20)>,
+        s2 struct<a:int, s:struct<a:int>>,
+        s3 struct<a:int, s:struct<a:int, b:varchar(20)>>,
+        s4 STRUCT<a:INT, s:STRUCT<b:INT, s:STRUCT<c:INT>>>,
+        s5 STRUCT<a:INT, b:STRUCT<c:INT, d:VARCHAR(10)>, e:INT>
+    ) PROPERTIES ("replication_allocation" = "tag.location.default: 1");
+    """
+
+    // insert into table
+    // right cases
+    sql "INSERT INTO test_struct_insert_into VALUES(1, {1}, {1, 'a'}, {1, 
{1}}, {1, {1, 'a'}}, {1, {1, {1}}}, {1, {1, 'a'}, 1})"
+
+    qt_select "select * from test_struct_insert_into order by id"
+
+    sql """INSERT INTO test_struct_insert_into VALUES (
+             2,
+             '{10}',                      -- s: right
+             '{20, "valid"}',             -- s1: right
+             '{30, {31}}',                -- s2: right(outer a=30,s.a=31)
+             '{40, {41, "nested"}}',      -- s3: right(a=40,s.a=41, 
s.b="nested")
+             '{50, {51, {52}}}',          -- s4: right(a=50 -> s.b=51 -> 
s.s.c=52)
+             '{60, {61, "text"}, 70}'     -- s5: right(a=60, b.c=61, 
b.d="text", e=70)
+         );"""
+
+    qt_select "select * from test_struct_insert_into order by id"
+
+    sql """
+    INSERT INTO test_struct_insert_into VALUES (
+         3,
+        '{10, 20}',       -- s:  more -> NULL
+        '{30, "valid"}',  -- s1: right
+        NULL,             -- s2: NULL
+        '{40, {41, "valid"}}',     -- s3: right
+        '{50, {51, null}}',     -- s4: s.s is null
+        '{60, NULL, 70}'  -- s5: b is NULL
+    );
+    """
+
+    qt_select "select * from test_struct_insert_into order by id"
+
+   sql """
+    INSERT INTO test_struct_insert_into VALUES (
+       4,
+       '{"invalid"}',      -- s.a type invalid cast -> s.a is NULL
+       '{40, 50}',         -- right
+       '{50, {"invalid"}}',-- s2.s.a type invalid cast -> s2.s is {"a":null}
+       '{60, {70, 80}}',   -- right
+       '{90, {"invalid", {100}}}',  -- s4.s.b type invalid cast -> s4.s is NULL
+       '{100, {110, 120}, "invalid"}'  -- s5.e type invalid cast -> s5.e is 
NULL
+   );
+   """
+   qt_select "select * from test_struct_insert_into order by id"
+
+   sql """
+   INSERT INTO test_struct_insert_into VALUES (
+       5,
+       '{10}',
+       '{20, "valid"}',
+       '{30, {31, 32}}',       -- s2.s more -> s2.s is NULL
+       '{40, {41, "nested", 42}}',  -- s3.s more -> s3.s is NULL
+       '{50, {51, {52, 53}}}',      -- s4.s.s more -> s4.s.s is NULL
+       '{60, {61, "text", 62}, 70}' -- s5.b more -> s5.b is NULL
+   );
+   """
+  qt_select "select * from test_struct_insert_into order by id"
+  test {
+    sql """
+    INSERT INTO test_struct_insert_into VALUES (
+        6,
+        '{10}',
+        '{20}',                -- s1 less  -> s1 is NULL
+        '{30, {31}}',          -- s2 right
+        '{40, {41}}',          -- s3.s less  -> s3.s is NULL
+        '{50, {51}}',          -- s4.s less  -> s4.s is NULL
+        '{60, {61}, 70}'       -- s5.b less  -> s5.b is NULL
+    );
+    """
+    exception("Size of offsets doesn't match size of column")
+    }
+    qt_select "select * from test_struct_insert_into order by id"
+
+    sql """
+    INSERT INTO test_struct_insert_into VALUES (
+        7,
+        '{10}',                  -- right
+        '{20, }',                -- s1.b is empty
+        '{30, }',                -- s2.s is empty
+        '{40, {41, "nested"}}',   -- right
+        '{50, {51, {52}}}',       -- right  
+        '{60, {61, }, 70}'      -- s5.b.d is empty
+    );
+    """
+     qt_select "select * from test_struct_insert_into order by id"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to