This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new edee8c1b8d7 [fix](struct)Fixed the issue of inserting into a struct type string literal with one more subfield causing BE coredump (#49485) edee8c1b8d7 is described below commit edee8c1b8d7191b20f44e9a95dc80ede6bd6c2e0 Author: amory <wangqian...@selectdb.com> AuthorDate: Wed Mar 26 22:44:09 2025 +0800 [fix](struct)Fixed the issue of inserting into a struct type string literal with one more subfield causing BE coredump (#49485) Fixed the issue of inserting into a struct type string literal with one more subfield causing BE coredump some situation like this blow will make BE core ``` create table t(a int, b int, s struct<a:int>) PROPERTIES ("replication_allocation" = "tag.location.default: 1"); insert into t values(1,1,'{1,2}'); ``` core info: ``` [WARNING!] /sys/kernel/mm/transparent_hugepage/enabled: [always] madvise never, Doris not recommend turning on THP, which may cause the BE process to use more memory and cannot be freed in time. Turn off THP: `echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled` start BE in local mode ================================================================= ==2818976==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x603000f17a90 at pc 0x55eba7fdd69c bp 0x7f03243dbd30 sp 0x7f03243dbd28 READ of size 8 at 0x603000f17a90 thread T928 (brpc_light) #0 0x55eba7fdd69b in std::__shared_ptr<doris::vectorized::DataTypeSerDe, (__gnu_cxx::_Lock_policy)2>::get() const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:1291:16 #1 0x55eba7fdd649 in std::__shared_ptr_access<doris::vectorized::DataTypeSerDe, (__gnu_cxx::_Lock_policy)2, false, false>::_M_get() const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:990:66 #2 0x55eba7fda316 in std::__shared_ptr_access<doris::vectorized::DataTypeSerDe, (__gnu_cxx::_Lock_policy)2, false, false>::operator->() const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:984:9 #3 0x55ebcfc8b19e in doris::vectorized::DataTypeStructSerDe::deserialize_one_cell_from_json(doris::vectorized::IColumn&, doris::Slice&, doris::vectorized::DataTypeSerDe::FormatOptions const&) const /mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_struct_serde.cpp:200:25 #4 0x55ebdac49c8b in doris::vectorized::ConvertImplGenericFromString::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long) /mnt/disk1/wangqiannan/amory/doris/be/src/vec/functions/function_cast.h:618:36 #5 0x55ebda2234d0 in doris::Status std::__invoke_impl<doris::Status, doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long), doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, unsigned long>(std::__invoke_other, doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, std::vector<u [...] #6 0x55ebda22323a in std::enable_if<is_invocable_r_v<doris::Status, doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long), doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, unsigned long>, doris::Status>::type std::__invoke_r<doris::Status, doris::Status (*&)(doris::FunctionContext*, doris::ve [...] #7 0x55ebda222e71 in std::_Function_handler<doris::Status (doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, unsigned long), doris::Status (*)(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long)>::_M_invoke(std::_Any_data const&, doris::FunctionContext*&&, doris::vectorized::Block&, std::vector<unsigned int, s [...] #8 0x55ebd9b3f1d2 in std::function<doris::Status (doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, unsigned long)>::operator()(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, unsigned long) const /mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function [...] #9 0x55ebda1ad7c7 in doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*, std::shared_ptr<doris::vectorized::IDataType const> const&, std::shared_ptr<doris::vectorized::IDataType const> const&, bool) const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long)::operator()(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std:: [...] #10 0x55ebda1acee2 in doris::Status std::__invoke_impl<doris::Status, doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*, std::shared_ptr<doris::vectorized::IDataType const> const&, std::shared_ptr<doris::vectorized::IDataType const> const&, bool) const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, unsigned long)&, doris::FunctionContext*, doris::vectorized:: [...] ``` --- .../data_types/serde/data_type_struct_serde.cpp | 6 +- .../data/insert_p0/test_struct_insert.out | Bin 625 -> 4698 bytes .../suites/insert_p0/test_struct_insert.groovy | 101 ++++++++++++++++++++- 3 files changed, 104 insertions(+), 3 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index ead4d0b2088..ee59138a372 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -158,7 +158,8 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic } Slice next(slice.data + start_pos, idx - start_pos); next.trim_prefix(); - if (field_pos > elem_size) { + // field_pos should always less than elem_size, if not, we should return error + if (field_pos >= elem_size) { // we should do column revert if error for (size_t j = 0; j < field_pos; j++) { struct_column.get_column(j).pop_back(1); @@ -188,7 +189,8 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic (key_added || !is_explicit_names)) { Slice next(slice.data + start_pos, idx - start_pos); next.trim_prefix(); - if (field_pos > elem_size) { + /// field_pos should always less than elem_size, if not, we should return error + if (field_pos >= elem_size) { // we should do column revert if error for (size_t j = 0; j < field_pos; j++) { struct_column.get_column(j).pop_back(1); diff --git a/regression-test/data/insert_p0/test_struct_insert.out b/regression-test/data/insert_p0/test_struct_insert.out index 3dc160ef7f0..91230f415e2 100644 Binary files a/regression-test/data/insert_p0/test_struct_insert.out and b/regression-test/data/insert_p0/test_struct_insert.out differ diff --git a/regression-test/suites/insert_p0/test_struct_insert.groovy b/regression-test/suites/insert_p0/test_struct_insert.groovy index a845c978580..e2ef485fbcb 100644 --- a/regression-test/suites/insert_p0/test_struct_insert.groovy +++ b/regression-test/suites/insert_p0/test_struct_insert.groovy @@ -73,4 +73,103 @@ suite("test_struct_insert") { // select the table and check whether the data is correct qt_select "select * from ${testTable} order by k1" -} \ No newline at end of file + + sql "DROP TABLE IF EXISTS test_struct_insert_into" + sql """ + CREATE TABLE IF NOT EXISTS test_struct_insert_into ( + id INT, + s STRUCT<a:INT>, + s1 STRUCT<a:INT, b:VARCHAR(20)>, + s2 struct<a:int, s:struct<a:int>>, + s3 struct<a:int, s:struct<a:int, b:varchar(20)>>, + s4 STRUCT<a:INT, s:STRUCT<b:INT, s:STRUCT<c:INT>>>, + s5 STRUCT<a:INT, b:STRUCT<c:INT, d:VARCHAR(10)>, e:INT> + ) PROPERTIES ("replication_allocation" = "tag.location.default: 1"); + """ + + // insert into table + // right cases + sql "INSERT INTO test_struct_insert_into VALUES(1, {1}, {1, 'a'}, {1, {1}}, {1, {1, 'a'}}, {1, {1, {1}}}, {1, {1, 'a'}, 1})" + + qt_select "select * from test_struct_insert_into order by id" + + sql """INSERT INTO test_struct_insert_into VALUES ( + 2, + '{10}', -- s: right + '{20, "valid"}', -- s1: right + '{30, {31}}', -- s2: right(outer a=30,s.a=31) + '{40, {41, "nested"}}', -- s3: right(a=40,s.a=41, s.b="nested") + '{50, {51, {52}}}', -- s4: right(a=50 -> s.b=51 -> s.s.c=52) + '{60, {61, "text"}, 70}' -- s5: right(a=60, b.c=61, b.d="text", e=70) + );""" + + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 3, + '{10, 20}', -- s: more -> NULL + '{30, "valid"}', -- s1: right + NULL, -- s2: NULL + '{40, {41, "valid"}}', -- s3: right + '{50, {51, null}}', -- s4: s.s is null + '{60, NULL, 70}' -- s5: b is NULL + ); + """ + + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 4, + '{"invalid"}', -- s.a type invalid cast -> s.a is NULL + '{40, 50}', -- right + '{50, {"invalid"}}',-- s2.s.a type invalid cast -> s2.s is {"a":null} + '{60, {70, 80}}', -- right + '{90, {"invalid", {100}}}', -- s4.s.b type invalid cast -> s4.s is NULL + '{100, {110, 120}, "invalid"}' -- s5.e type invalid cast -> s5.e is NULL + ); + """ + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 5, + '{10}', + '{20, "valid"}', + '{30, {31, 32}}', -- s2.s more -> s2.s is NULL + '{40, {41, "nested", 42}}', -- s3.s more -> s3.s is NULL + '{50, {51, {52, 53}}}', -- s4.s.s more -> s4.s.s is NULL + '{60, {61, "text", 62}, 70}' -- s5.b more -> s5.b is NULL + ); + """ + qt_select "select * from test_struct_insert_into order by id" + test { + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 6, + '{10}', + '{20}', -- s1 less -> s1 is NULL + '{30, {31}}', -- s2 right + '{40, {41}}', -- s3.s less -> s3.s is NULL + '{50, {51}}', -- s4.s less -> s4.s is NULL + '{60, {61}, 70}' -- s5.b less -> s5.b is NULL + ); + """ + exception("Size of offsets doesn't match size of column") + } + qt_select "select * from test_struct_insert_into order by id" + + sql """ + INSERT INTO test_struct_insert_into VALUES ( + 7, + '{10}', -- right + '{20, }', -- s1.b is empty + '{30, }', -- s2.s is empty + '{40, {41, "nested"}}', -- right + '{50, {51, {52}}}', -- right + '{60, {61, }, 70}' -- s5.b.d is empty + ); + """ + qt_select "select * from test_struct_insert_into order by id" +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org