This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1abda1c446 [Fix](merge-on-write) Correct the alignment process when the existing rows with same key has marked delete sign (#24877) 1abda1c446 is described below commit 1abda1c4463fce757043e5c6c88c4c77ce72b3e1 Author: bobhan1 <bh2444151...@outlook.com> AuthorDate: Tue Sep 26 16:09:20 2023 +0800 [Fix](merge-on-write) Correct the alignment process when the existing rows with same key has marked delete sign (#24877) --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 22 ++++++- .../test_partial_update_delete_sign.out | 16 +++++ .../test_partial_update_delete_sign_data.csv | 1 + .../test_partial_update_delete_sign.groovy | 68 ++++++++++++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 1bfac900d2..988b39f02c 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -585,7 +585,17 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f // build default value columns auto default_value_block = old_value_block.clone_empty(); auto mutable_default_value_columns = default_value_block.mutate_columns(); - if (has_default_or_nullable) { + + const vectorized::Int8* delete_sign_column_data = nullptr; + if (const vectorized::ColumnWithTypeAndName* delete_sign_column = + old_value_block.try_get_by_name(DELETE_SIGN); + delete_sign_column != nullptr && _tablet_schema->has_sequence_col()) { + auto& delete_sign_col = + reinterpret_cast<const vectorized::ColumnInt8&>(*(delete_sign_column->column)); + delete_sign_column_data = delete_sign_col.get_data().data(); + } + + if (has_default_or_nullable || delete_sign_column_data != nullptr) { for (auto i = 0; i < cids_missing.size(); ++i) { const auto& column = _tablet_schema->column(cids_missing[i]); if (column.has_default_value()) { @@ -600,7 +610,15 @@ Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f // fill all missing value from mutable_old_columns, need to consider default value and null value for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { - if (use_default_or_null_flag[idx]) { + // `use_default_or_null_flag[idx] == true` doesn't mean that we should read values from the old row + // for the missing columns. For example, if a table has sequence column, the rows with DELETE_SIGN column + // marked will not be marked in delete bitmap(see https://github.com/apache/doris/pull/24011), so it will + // be found in Tablet::lookup_row_key() and `use_default_or_null_flag[idx]` will be false. But we should not + // read values from old rows for missing values in this occasion. So we should read the DELETE_SIGN column + // to check if a row REALLY exists in the table. + if (use_default_or_null_flag[idx] || + (delete_sign_column_data != nullptr && + delete_sign_column_data[read_index[idx + segment_start_pos]] != 0)) { for (auto i = 0; i < cids_missing.size(); ++i) { // if the column has default value, fiil it with default value // otherwise, if the column is nullable, fill it with null value diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out index 0f1cd09e7d..eb0a501090 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out @@ -52,3 +52,19 @@ 4 4 4 4 4 0 5 \N \N \N \N 1 +-- !1 -- +1 1 1 + +-- !2 -- + +-- !3 -- +1 2 \N + +-- !1 -- +1 1 1 1 + +-- !2 -- + +-- !3 -- +1 2 \N \N + diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign_data.csv b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign_data.csv new file mode 100644 index 0000000000..d72f2010a8 --- /dev/null +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign_data.csv @@ -0,0 +1 @@ +1,2 diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy index f8369f0ed3..013ca81956 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy @@ -116,4 +116,72 @@ suite('test_partial_update_delete_sign') { // skip_delete_bitmap=false, skip_delete_sign=true qt_2 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName2} order by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;" sql "drop table if exists ${tableName2};" + + + // partial update a row that has been deleted by delete sign(table without sequence column) + sql "set skip_delete_sign=false;" + sql "set skip_storage_engine_merge=false;" + sql "set skip_delete_bitmap=false;" + sql "sync" + def tableName3 = "test_partial_update_delete_sign3" + sql "DROP TABLE IF EXISTS ${tableName3};" + sql """ create table ${tableName3} ( + k int, + v1 int, + v2 int + ) ENGINE=OLAP unique key (k) + distributed by hash(k) buckets 1 + properties("replication_num" = "1", + "enable_unique_key_merge_on_write" = "true"); """ + sql "insert into ${tableName3} values(1,1,1);" + qt_1 "select * from ${tableName3} order by k;" + sql "insert into ${tableName3}(k,v1,v2,__DORIS_DELETE_SIGN__) values(1,1,1,1);" + qt_2 "select * from ${tableName3} order by k;" + streamLoad { + table "${tableName3}" + + set 'column_separator', ',' + set 'format', 'csv' + set 'partial_columns', 'true' + set 'columns', 'k,v1' + + file 'test_partial_update_delete_sign_data.csv' + time 10000 // limit inflight 10s + } + sql "sync" + qt_3 "select * from ${tableName3} order by k;" + sql "drop table if exists ${tableName3};" + + + // partial update a row that has been deleted by delete sign(table with sequence column) + def tableName4 = "test_partial_update_delete_sign4" + sql "DROP TABLE IF EXISTS ${tableName4};" + sql """ create table ${tableName4} ( + k int, + v1 int, + v2 int, + c int + ) ENGINE=OLAP unique key (k) + distributed by hash(k) buckets 1 + properties("replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "function_column.sequence_col" = "c"); """ + sql "insert into ${tableName4} values(1,1,1,1);" + qt_1 "select * from ${tableName4} order by k;" + sql "insert into ${tableName4}(k,v1,v2,c,__DORIS_DELETE_SIGN__) values(1,1,1,1,1);" + qt_2 "select * from ${tableName4} order by k;" + streamLoad { + table "${tableName4}" + + set 'column_separator', ',' + set 'format', 'csv' + set 'partial_columns', 'true' + set 'columns', 'k,v1' + + file 'test_partial_update_delete_sign_data.csv' + time 10000 // limit inflight 10s + } + sql "sync" + qt_3 "select * from ${tableName4} order by k;" + sql "drop table if exists ${tableName4};" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org