This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1abda1c446 [Fix](merge-on-write) Correct the alignment process when 
the existing rows with same key has marked delete sign (#24877)
1abda1c446 is described below

commit 1abda1c4463fce757043e5c6c88c4c77ce72b3e1
Author: bobhan1 <bh2444151...@outlook.com>
AuthorDate: Tue Sep 26 16:09:20 2023 +0800

    [Fix](merge-on-write) Correct the alignment process when the existing rows 
with same key has marked delete sign (#24877)
---
 be/src/olap/rowset/segment_v2/segment_writer.cpp   | 22 ++++++-
 .../test_partial_update_delete_sign.out            | 16 +++++
 .../test_partial_update_delete_sign_data.csv       |  1 +
 .../test_partial_update_delete_sign.groovy         | 68 ++++++++++++++++++++++
 4 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 1bfac900d2..988b39f02c 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -585,7 +585,17 @@ Status 
SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f
     // build default value columns
     auto default_value_block = old_value_block.clone_empty();
     auto mutable_default_value_columns = default_value_block.mutate_columns();
-    if (has_default_or_nullable) {
+
+    const vectorized::Int8* delete_sign_column_data = nullptr;
+    if (const vectorized::ColumnWithTypeAndName* delete_sign_column =
+                old_value_block.try_get_by_name(DELETE_SIGN);
+        delete_sign_column != nullptr && _tablet_schema->has_sequence_col()) {
+        auto& delete_sign_col =
+                reinterpret_cast<const 
vectorized::ColumnInt8&>(*(delete_sign_column->column));
+        delete_sign_column_data = delete_sign_col.get_data().data();
+    }
+
+    if (has_default_or_nullable || delete_sign_column_data != nullptr) {
         for (auto i = 0; i < cids_missing.size(); ++i) {
             const auto& column = _tablet_schema->column(cids_missing[i]);
             if (column.has_default_value()) {
@@ -600,7 +610,15 @@ Status 
SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_f
 
     // fill all missing value from mutable_old_columns, need to consider 
default value and null value
     for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) {
-        if (use_default_or_null_flag[idx]) {
+        // `use_default_or_null_flag[idx] == true` doesn't mean that we should 
read values from the old row
+        // for the missing columns. For example, if a table has sequence 
column, the rows with DELETE_SIGN column
+        // marked will not be marked in delete bitmap(see 
https://github.com/apache/doris/pull/24011), so it will
+        // be found in Tablet::lookup_row_key() and 
`use_default_or_null_flag[idx]` will be false. But we should not
+        // read values from old rows for missing values in this occasion. So 
we should read the DELETE_SIGN column
+        // to check if a row REALLY exists in the table.
+        if (use_default_or_null_flag[idx] ||
+            (delete_sign_column_data != nullptr &&
+             delete_sign_column_data[read_index[idx + segment_start_pos]] != 
0)) {
             for (auto i = 0; i < cids_missing.size(); ++i) {
                 // if the column has default value, fiil it with default value
                 // otherwise, if the column is nullable, fill it with null 
value
diff --git 
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out
 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out
index 0f1cd09e7d..eb0a501090 100644
--- 
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out
+++ 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.out
@@ -52,3 +52,19 @@
 4      4       4       4       4       0
 5      \N      \N      \N      \N      1
 
+-- !1 --
+1      1       1
+
+-- !2 --
+
+-- !3 --
+1      2       \N
+
+-- !1 --
+1      1       1       1
+
+-- !2 --
+
+-- !3 --
+1      2       \N      \N
+
diff --git 
a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign_data.csv
 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign_data.csv
new file mode 100644
index 0000000000..d72f2010a8
--- /dev/null
+++ 
b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete_sign_data.csv
@@ -0,0 +1 @@
+1,2
diff --git 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy
 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy
index f8369f0ed3..013ca81956 100644
--- 
a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy
+++ 
b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete_sign.groovy
@@ -116,4 +116,72 @@ suite('test_partial_update_delete_sign') {
     // skip_delete_bitmap=false, skip_delete_sign=true
     qt_2 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName2} order 
by k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__;"
     sql "drop table if exists ${tableName2};"
+
+
+    // partial update a row that has been deleted by delete sign(table without 
sequence column)
+    sql "set skip_delete_sign=false;"
+    sql "set skip_storage_engine_merge=false;"
+    sql "set skip_delete_bitmap=false;"
+    sql "sync"
+    def tableName3 = "test_partial_update_delete_sign3"
+    sql "DROP TABLE IF EXISTS ${tableName3};"
+    sql """ create table ${tableName3} (
+        k int,
+        v1 int,
+        v2 int
+    ) ENGINE=OLAP unique key (k)
+    distributed by hash(k) buckets 1
+    properties("replication_num" = "1",
+    "enable_unique_key_merge_on_write" = "true"); """
+    sql "insert into ${tableName3} values(1,1,1);"
+    qt_1 "select * from ${tableName3} order by k;"
+    sql "insert into ${tableName3}(k,v1,v2,__DORIS_DELETE_SIGN__) 
values(1,1,1,1);"
+    qt_2 "select * from ${tableName3} order by k;"
+    streamLoad {
+        table "${tableName3}"
+
+        set 'column_separator', ','
+        set 'format', 'csv'
+        set 'partial_columns', 'true'
+        set 'columns', 'k,v1'
+
+        file 'test_partial_update_delete_sign_data.csv'
+        time 10000 // limit inflight 10s
+    }
+    sql "sync"
+    qt_3 "select * from ${tableName3} order by k;"
+    sql "drop table if exists ${tableName3};"
+
+
+    // partial update a row that has been deleted by delete sign(table with 
sequence column)
+    def tableName4 = "test_partial_update_delete_sign4"
+    sql "DROP TABLE IF EXISTS ${tableName4};"
+    sql """ create table ${tableName4} (
+        k int,
+        v1 int,
+        v2 int,
+        c int
+    ) ENGINE=OLAP unique key (k)
+    distributed by hash(k) buckets 1
+    properties("replication_num" = "1",
+    "enable_unique_key_merge_on_write" = "true",
+    "function_column.sequence_col" = "c"); """
+    sql "insert into ${tableName4} values(1,1,1,1);"
+    qt_1 "select * from ${tableName4} order by k;"
+    sql "insert into ${tableName4}(k,v1,v2,c,__DORIS_DELETE_SIGN__) 
values(1,1,1,1,1);"
+    qt_2 "select * from ${tableName4} order by k;"
+    streamLoad {
+        table "${tableName4}"
+
+        set 'column_separator', ','
+        set 'format', 'csv'
+        set 'partial_columns', 'true'
+        set 'columns', 'k,v1'
+
+        file 'test_partial_update_delete_sign_data.csv'
+        time 10000 // limit inflight 10s
+    }
+    sql "sync"
+    qt_3 "select * from ${tableName4} order by k;"
+    sql "drop table if exists ${tableName4};"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to