This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new aee98d89b4f [fix](schema-change) fix array/map/struct in schema-change 
not-null to null will make core (#45305)
aee98d89b4f is described below

commit aee98d89b4f1cd91d3b8223ab142ce7004ac8f41
Author: amory <wangqian...@selectdb.com>
AuthorDate: Mon Dec 16 18:12:35 2024 +0800

    [fix](schema-change) fix array/map/struct in schema-change not-null to null 
will make core (#45305)
    
    if we create table with array/map/struct column which defined not null
    then we alter column to null, in fact it can be work ,
    but when we select it ,here may meet core below
    ```
    mysql> create table etest ( `id` VARCHAR(50) NOT NULL COMMENT '主键id',  
`second_categories` ARRAY < VARCHAR(255)> NOT NULL COMMENT '二级分类') ENGINE = 
OLAP UNIQUE KEY(`id`) DISTRIBUTED BY HASH(`id`) BUCKETS 1  PROPERTIES (  
'replication_allocation' = 'tag.location.default: 1',  
'enable_unique_key_merge_on_write' = 'true',  'light_schema_change' = 'true', 
'disable_auto_compaction'='true');
    Query OK, 0 rows affected (0.12 sec)
    
    mysql> insert into etest values (3, ['aaa', 'bbb', 'ccc']);
    Query OK, 1 row affected (0.22 sec)
    {'label':'label_e30fad022ff5423f_9c74c9fc9eb886a7', 'status':'VISIBLE', 
'txnId':'1025'}
    
    mysql> alter table etest  modify column `second_categories` 
array<varchar(255)> NULL COMMENT '二级分类';
    Query OK, 0 rows affected (0.10 sec)
    
    mysql> show alter table column;
    
+-------+-----------+-------------------------+-------------------------+-----------+---------+---------------+---------------+---------------+----------+------+----------+---------+
    | JobId | TableName | CreateTime              | FinishTime              | 
IndexName | IndexId | OriginIndexId | SchemaVersion | TransactionId | State    
| Msg  | Progress | Timeout |
    
+-------+-----------+-------------------------+-------------------------+-----------+---------+---------------+---------------+---------------+----------+------+----------+---------+
    | 11109 | mtest     | 2024-12-10 20:46:04.491 | 2024-12-10 20:46:05.731 | 
mtest     | 11110   | 11093         | 1:1857912462  | 1007          | FINISHED 
|      | NULL     | 2592000 |
    | 11156 | btest     | 2024-12-10 21:07:07.575 | 2024-12-10 21:07:08.901 | 
btest     | 11157   | 11147         | 1:70379151    | 1018          | FINISHED 
|      | NULL     | 2592000 |
    | 11178 | ctest     | 2024-12-10 21:10:52.656 | 2024-12-10 21:10:53.931 | 
ctest     | 11179   | 11162         | 1:1462395050  | 1022          | FINISHED 
|      | NULL     | 2592000 |
    | 11189 | dtest     | 2024-12-10 21:22:17.019 | 2024-12-10 21:22:18.029 | 
dtest     | 11190   | 11184         | 1:1194829729  | 1024          | FINISHED 
|      | NULL     | 2592000 |
    | 11200 | etest     | 2024-12-10 21:30:06.902 | 2024-12-10 21:30:08.094 | 
etest     | 11201   | 11195         | 1:18626462    | 1026          | FINISHED 
|      | NULL     | 2592000 |
    
+-------+-----------+-------------------------+-------------------------+-----------+---------+---------------+---------------+---------------+----------+------+----------+---------+
    5 rows in set (0.10 sec)
    
    mysql> select * from etest;
    ERROR 1105 (HY000): errCode = 2, detailMessage = There is no scanNode 
Backend available.[10048: not alive]
    ```
    
    be core:
    ```
    StdoutLogger 2024-12-10 21:09:52,967 Start time: 2024年 12月 10日 星期二 21:09:52 
CST
    INFO: java_cmd /mnt/disk2/wangqiannan/tool/jdk-17.0.10/bin/java
    INFO: jdk_version 17
    Java HotSpot(TM) 64-Bit Server VM warning: Option CriticalJNINatives was 
deprecated in version 16.0 and will likely be removed in a future release.
    SLF4J: Class path contains multiple SLF4J bindings.
    SLF4J: Found binding in 
[jar:file:/mnt/disk2/wangqiannan/amory/master/doris/output/be/lib/java_extensions/preload-extensions/preload-extensions-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: Found binding in 
[jar:file:/mnt/disk2/wangqiannan/amory/master/doris/output/be/lib/java_extensions/java-udf/java-udf-jar-with-dependencies.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: Found binding in 
[jar:file:/mnt/disk2/wangqiannan/amory/master/doris/output/be/lib/hadoop_hdfs/common/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an 
explanation.
    SLF4J: Actual binding is of type [org.slf4j.impl.Reload4jLoggerFactory]
    [WARNING!] /sys/kernel/mm/transparent_hugepage/enabled: [always] madvise 
never, Doris not recommend turning on THP, which may cause the BE process to 
use more memory and cannot be freed in time. Turn off THP: `echo madvise | sudo 
tee /sys/kernel/mm/transparent_hugepage/enabled`
    *** Query id: 5a013c428ba942dd-afd4481a8e7ec4b3 ***
    *** is nereids: 1 ***
    *** tablet id: 0 ***
    *** Aborted at 1733836267 (unix time) try "date -d @1733836267" if you are 
using GNU date ***
    *** Current BE git commitID: 443e87e203 ***
    *** SIGSEGV address not mapped to object (@0x0) received by PID 3270216 
(TID 3276799 OR 0x7f0aae76a700) from PID 0; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/common/signal_handler.h:421
     1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in 
/mnt/disk2/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so
     2# JVM_handle_linux_signal in 
/mnt/disk2/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so
     3# 0x00007F10737305B0 in /lib64/libc.so.6
     4# doris::segment_v2::ArrayFileColumnIterator::next_batch(unsigned long*, 
COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>&, 
bool*) at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/olap/rowset/segment_v2/column_reader.cpp:999
     5# doris::segment_v2::ArrayFileColumnIterator::read_by_rowids(unsigned int 
const*, unsigned long, 
COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>&) at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/olap/rowset/segment_v2/column_reader.cpp:1012
     6# 
doris::segment_v2::SegmentIterator::_read_columns_by_rowids(std::vector<unsigned
 int, std::allocator<unsigned int> >&, std::vector<unsigned int, 
std::allocator<unsigned int> >&, unsigned short*, unsigned long, 
std::vector<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>,
 
std::allocator<COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>
 > >*) in /mnt/disk2/wangqiannan/amory/master/doris/output/be/lib/doris_be
     7# 
doris::segment_v2::SegmentIterator::_next_batch_internal(doris::vectorized::Block*)
 at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2222
     8# 
doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*) at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:1908
     9# 
doris::segment_v2::LazyInitSegmentIterator::next_batch(doris::vectorized::Block*)
 at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:45
    10# doris::BetaRowsetReader::next_block(doris::vectorized::Block*) at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/olap/rowset/beta_rowset_reader.cpp:342
    11# 
doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row() at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/vec/olap/vcollect_iterator.cpp:508
    12# 
doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref() at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/vec/olap/vcollect_iterator.cpp:483
    13# 
doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref() in 
/mnt/disk2/wangqiannan/amory/master/doris/output/be/lib/doris_be
    14# 
doris::vectorized::VCollectIterator::build_heap(std::vector<std::shared_ptr<doris::RowsetReader>,
 std::allocator<std::shared_ptr<doris::RowsetReader> > >&) at 
/mnt/disk2/wangqiannan/amory/master/doris/be/src/vec/olap/vcollect_iterator.cpp:186
    ```
---
 be/src/olap/rowset/segment_v2/column_reader.cpp    | 42 +++++++++--
 .../ddl/create_nestedtypes_with_schemachange.out   | 84 ++++++++++++++++++++++
 .../create_nestedtypes_with_schemachange.groovy    | 57 +++++++++++++++
 3 files changed, 177 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp 
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index b96cf4f7e67..9d5328de869 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -871,8 +871,18 @@ Status MapFileColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnPtr
         size_t num_read = *n;
         auto null_map_ptr =
                 
static_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column_ptr();
-        bool null_signs_has_null = false;
-        RETURN_IF_ERROR(_null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        // in not-null to null linked-schemachange mode,
+        // actually we do not change dat data include meta in footer,
+        // so may dst from changed meta which is nullable but old data is not 
nullable,
+        // if so, we should set null_map to all null by default
+        if (_null_iterator) {
+            bool null_signs_has_null = false;
+            RETURN_IF_ERROR(
+                    _null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        } else {
+            auto& null_map = 
assert_cast<vectorized::ColumnUInt8&>(*null_map_ptr);
+            null_map.insert_many_vals(0, num_read);
+        }
         DCHECK(num_read == *n);
     }
     return Status::OK();
@@ -932,8 +942,18 @@ Status StructFileColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumn
         size_t num_read = *n;
         auto null_map_ptr =
                 
static_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column_ptr();
-        bool null_signs_has_null = false;
-        RETURN_IF_ERROR(_null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        // in not-null to null linked-schemachange mode,
+        // actually we do not change dat data include meta in footer,
+        // so may dst from changed meta which is nullable but old data is not 
nullable,
+        // if so, we should set null_map to all null by default
+        if (_null_iterator) {
+            bool null_signs_has_null = false;
+            RETURN_IF_ERROR(
+                    _null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        } else {
+            auto& null_map = 
assert_cast<vectorized::ColumnUInt8&>(*null_map_ptr);
+            null_map.insert_many_vals(0, num_read);
+        }
         DCHECK(num_read == *n);
     }
 
@@ -1086,8 +1106,18 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, 
vectorized::MutableColumnP
         auto null_map_ptr =
                 
static_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column_ptr();
         size_t num_read = *n;
-        bool null_signs_has_null = false;
-        RETURN_IF_ERROR(_null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        // in not-null to null linked-schemachange mode,
+        // actually we do not change dat data include meta in footer,
+        // so may dst from changed meta which is nullable but old data is not 
nullable,
+        // if so, we should set null_map to all null by default
+        if (_null_iterator) {
+            bool null_signs_has_null = false;
+            RETURN_IF_ERROR(
+                    _null_iterator->next_batch(&num_read, null_map_ptr, 
&null_signs_has_null));
+        } else {
+            auto& null_map = 
assert_cast<vectorized::ColumnUInt8&>(*null_map_ptr);
+            null_map.insert_many_vals(0, num_read);
+        }
         DCHECK(num_read == *n);
     }
 
diff --git 
a/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
 
b/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
index af88eb6b9bb..5b811095fcb 100644
--- 
a/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
+++ 
b/regression-test/data/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.out
@@ -44,3 +44,87 @@ col3 array<int>      Yes     false   \N      NONE
 col4   map<int,int>    Yes     false   \N      NONE
 col5   struct<f1:int>  Yes     false   \N      NONE
 
+-- !sql_before --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql --
+col0   bigint  No      true    \N      
+col2   int     No      false   \N      NONE
+col3   array<int>      Yes     false   \N      NONE
+col4   map<int,int>    Yes     false   \N      NONE
+col5   struct<f1:int>  Yes     false   \N      NONE
+col6   variant Yes     false   \N      NONE
+
+-- !sql_after --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql_before --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql --
+col0   bigint  No      true    \N      
+col2   int     No      false   \N      NONE
+col3   array<int>      Yes     false   \N      NONE
+col4   map<int,int>    No      false   \N      NONE
+col5   struct<f1:int>  No      false   \N      NONE
+col6   variant No      false   \N      NONE
+
+-- !sql_after --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql_before --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql --
+col0   bigint  No      true    \N      
+col2   int     No      false   \N      NONE
+col3   array<int>      Yes     false   \N      NONE
+col4   map<int,int>    Yes     false   \N      NONE
+col5   struct<f1:int>  Yes     false   \N      NONE
+col6   variant Yes     false   \N      NONE
+
+-- !sql_after --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql_before --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql --
+col0   bigint  No      true    \N      
+col2   int     No      false   \N      NONE
+col3   array<int>      No      false   \N      NONE
+col4   map<int,int>    Yes     false   \N      NONE
+col5   struct<f1:int>  No      false   \N      NONE
+col6   variant No      false   \N      NONE
+
+-- !sql_after --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql_before --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql --
+col0   bigint  No      true    \N      
+col2   int     No      false   \N      NONE
+col3   array<int>      Yes     false   \N      NONE
+col4   map<int,int>    Yes     false   \N      NONE
+col5   struct<f1:int>  Yes     false   \N      NONE
+col6   variant Yes     false   \N      NONE
+
+-- !sql_after --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql_before --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
+-- !sql --
+col0   bigint  No      true    \N      
+col2   int     No      false   \N      NONE
+col3   array<int>      No      false   \N      NONE
+col4   map<int,int>    No      false   \N      NONE
+col5   struct<f1:int>  Yes     false   \N      NONE
+col6   variant No      false   \N      NONE
+
+-- !sql_after --
+1      2       [1, 2]  {1:2}   {"f1":1}        {"a":[1,2,3]}
+
diff --git 
a/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
 
b/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
index ec0f163821d..62b9ab3bb56 100644
--- 
a/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
+++ 
b/regression-test/suites/datatype_p0/nested_types/ddl/create_nestedtypes_with_schemachange.groovy
@@ -62,4 +62,61 @@ suite("create_nestedtypes_with_schemachange", "p0") {
     // struct with other type
     create_nested_table_and_schema_change.call("test_struct_schemachange_1", 
"STRUCT<f1: varchar(1)>", "col5", "errCode = 2");
 
+    def create_nested_table_and_schema_change_null_trans = {testTablex, 
nested_type, column_name, notNull2Null ->
+        def null_define = "NULL"
+        if (notNull2Null) {
+            null_define = "NOT NULL"
+        }
+        // create basic type
+        sql "DROP TABLE IF EXISTS $testTablex"
+        sql """ CREATE TABLE IF NOT EXISTS $testTablex (
+                     col0 BIGINT NOT NULL,
+                     col2 int NOT NULL,
+                     col3 array<int> $null_define,
+                     col4 map<int, int> $null_define,
+                     col5 struct<f1: int> $null_define,
+                     col6 variant $null_define
+                )
+                /* mow */
+                UNIQUE KEY(col0) DISTRIBUTED BY HASH(col0) BUCKETS 4 
PROPERTIES (
+                  "enable_unique_key_merge_on_write" = "true",
+                  "replication_num" = "1",
+                  'light_schema_change' = 'true', 
'disable_auto_compaction'='true'
+                ); """
+        // insert data
+        sql """ INSERT INTO $testTablex VALUES (1, 2, array(1, 2), map(1, 2), 
named_struct('f1', 1), '{"a": [1,2,3]}')"""
+        // select
+        qt_sql_before "select * from $testTablex"
+
+        if (notNull2Null) {
+            sql "ALTER TABLE $testTablex MODIFY COLUMN $column_name 
$nested_type NULL"
+            waitForSchemaChangeDone {
+                sql """ SHOW ALTER TABLE COLUMN WHERE IndexName='$testTablex' 
ORDER BY createtime DESC LIMIT 1 """
+                time 600
+            }
+        } else {
+            // schema change from null to non-nullable is not supported
+            test {
+               sql "ALTER TABLE $testTablex MODIFY COLUMN $column_name 
$nested_type NOT NULL"
+               exception "Can not change from nullable to non-nullable"
+            }
+        }
+        // desc table
+        qt_sql "DESC $testTablex"
+        qt_sql_after "select * from $testTablex"
+    }
+
+    // array
+    
create_nested_table_and_schema_change_null_trans.call("test_array_schemachange_null",
 "ARRAY<INT>", "col3", false)
+    
create_nested_table_and_schema_change_null_trans.call("test_array_schemachange_null1",
 "ARRAY<INT>", "col3", true)
+    // map
+    
create_nested_table_and_schema_change_null_trans.call("test_map_schemachange_null",
 "Map<INT, INT>", "col4", false)
+    
create_nested_table_and_schema_change_null_trans.call("test_map_schemachange_null1",
 "Map<INT, INT>", "col4", true)
+    // struct
+    
create_nested_table_and_schema_change_null_trans.call("test_struct_schemachange_null",
 "struct<f1: int>", "col5", false)
+    
create_nested_table_and_schema_change_null_trans.call("test_struct_schemachange_null1",
 "struct<f1: int>", "col5", true)
+    // variant
+    // 
create_nested_table_and_schema_change_null_trans.call("test_v_schemachange_null",
 "variant", "col6", false)
+    // 
create_nested_table_and_schema_change_null_trans.call("test_v_schemachange_null1",
 "variant", "col6", true)
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to