This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c972ea55bf8 [fix](merge-on-write) when full clone failed, duplicate 
key might occur (#37001)
c972ea55bf8 is described below

commit c972ea55bf8c405df2c2dbf0ec02d8a1717a76ed
Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com>
AuthorDate: Tue Jul 2 09:53:49 2024 +0800

    [fix](merge-on-write) when full clone failed, duplicate key might occur 
(#37001)
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    introduced by #31268
    
    full clone failure may produce duplicate keys in mow table
    the bug would be triggered in the following condition:
    1. replica 0 miss version
    2. replica 0 try to do full clone from other replicas
    3. the full clone failed and the delete bitmap is overrided incorrectly
    4. replica 0 try to do incremental clone again and this time the clone
    succeed
    5. incremental clone can't fix the delete bitmap overrided by previous
    failed full clone
    6. duplicate key occurred
    
    solution:
    for full clone, don't override the delete bitmap, use `merge()` method
    instead.
---
 be/src/olap/snapshot_manager.cpp                   |   9 ++
 be/src/olap/tablet.cpp                             |   8 ++
 be/src/olap/task/engine_clone_task.cpp             |   2 +-
 .../test_mow_full_clone_exception.out              |  37 ++++++
 .../test_mow_full_clone_exception.groovy           | 137 +++++++++++++++++++++
 5 files changed, 192 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index eab49997599..e2f117b54e2 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -451,6 +451,15 @@ Status SnapshotManager::_create_snapshot_files(const 
TabletSharedPtr& ref_tablet
                 }
             }
 
+            
DBUG_EXECUTE_IF("SnapshotManager.create_snapshot_files.allow_inc_clone", {
+                auto tablet_id = dp->param("tablet_id", 0);
+                auto is_full_clone = dp->param("is_full_clone", false);
+                if (ref_tablet->tablet_id() == tablet_id && is_full_clone) {
+                    LOG(INFO) << "injected full clone for tabelt: " << 
tablet_id;
+                    res = Status::InternalError("fault injection error");
+                }
+            });
+
             // be would definitely set it as true no matter has missed version 
or not, we could
             // just check whether the missed version is empty or not
             int64_t version = -1;
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index d49d56ef2d3..8b6ebc2c395 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -404,6 +404,14 @@ Status Tablet::revise_tablet_meta(const 
std::vector<RowsetSharedPtr>& to_add,
         break; // while (keys_type() == UNIQUE_KEYS && 
enable_unique_key_merge_on_write())
     }
 
+    DBUG_EXECUTE_IF("Tablet.revise_tablet_meta_fail", {
+        auto ptablet_id = dp->param("tablet_id", 0);
+        if (tablet_id() == ptablet_id) {
+            LOG(INFO) << "injected revies_tablet_meta failure for tabelt: " << 
ptablet_id;
+            calc_bm_status = Status::InternalError("fault injection error");
+        }
+    });
+
     // error handling
     if (!calc_bm_status.ok()) {
         if (is_incremental_clone) {
diff --git a/be/src/olap/task/engine_clone_task.cpp 
b/be/src/olap/task/engine_clone_task.cpp
index 590fdd1b4a7..a05a640dcfe 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -875,7 +875,7 @@ Status EngineCloneTask::_finish_full_clone(Tablet* tablet,
         }
     }
     if (tablet->enable_unique_key_merge_on_write()) {
-        tablet->tablet_meta()->delete_bitmap() = 
cloned_tablet_meta->delete_bitmap();
+        
tablet->tablet_meta()->delete_bitmap().merge(cloned_tablet_meta->delete_bitmap());
     }
     return tablet->revise_tablet_meta(to_add, to_delete, false);
     // TODO(plat1ko): write cooldown meta to remote if this replica is 
cooldown replica
diff --git 
a/regression-test/data/unique_with_mow_p0/test_mow_full_clone_exception.out 
b/regression-test/data/unique_with_mow_p0/test_mow_full_clone_exception.out
new file mode 100644
index 00000000000..f11c60b41e1
--- /dev/null
+++ b/regression-test/data/unique_with_mow_p0/test_mow_full_clone_exception.out
@@ -0,0 +1,37 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      10
+2      200
+3      30
+4      400
+5      500
+6      600
+7      7
+8      8
+9      9
+10     10
+
+-- !sql --
+1      10
+2      200
+3      30
+4      400
+5      500
+6      600
+7      7
+8      8
+9      9
+10     10
+
+-- !sql --
+1      10
+2      200
+3      30
+4      400
+5      500
+6      600
+7      7
+8      8
+9      9
+10     10
+
diff --git 
a/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy
 
b/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy
new file mode 100644
index 00000000000..c3fb567f258
--- /dev/null
+++ 
b/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+import org.apache.doris.regression.util.NodeType
+
+// This case can reproduce an clone issue which may produce duplicate keys in 
mow table
+// the bug would be triggered in the following condition:
+// 1. replica 0 miss version
+// 2. replica 0 try to do full clone from other replicas
+// 3. the full clone failed and the delete bitmap is overrided incorrectly
+// 4. replica 0 try to do incremental clone again and this time the clone 
succeed
+// 5. incremental clone can't fix the delete bitmap overrided by previous 
failed full clone
+// 6. duplicate key occurred
+//
+// the bug is fixed in #37001
+
+suite('test_full_clone_exception') {
+    def options = new ClusterOptions()
+    options.feConfigs += [
+        'disable_tablet_scheduler=true',
+        'tablet_checker_interval_ms=500',
+        'schedule_batch_size=1000',
+        'schedule_slot_num_per_hdd_path=1000',
+    ]
+    options.beConfigs += [
+        'disable_auto_compaction=true',
+        'report_tablet_interval_seconds=1',
+    ]
+
+    options.enableDebugPoints()
+    options.cloudMode = false
+    docker(options) {
+        def txnFailureInject = 'TxnManager.prepare_txn.random_failed'
+        def fullCloneInject = 
'SnapshotManager.create_snapshot_files.allow_inc_clone'
+        def reviseTabletMetaInject='Tablet.revise_tablet_meta_fail'
+        def be1 = sql_return_maparray('show backends').get(0)
+        def be2 = sql_return_maparray('show backends').get(1)
+        def be3 = sql_return_maparray('show backends').get(2)
+
+        def addInjectToBE = { beIdx, injectName, param ->
+            def be = sql_return_maparray('show backends').get(beIdx)
+            GetDebugPoint().enableDebugPoint(be.Host, be.HttpPort as int, 
NodeType.BE, injectName, param)
+        }
+
+        def deleteInjectOnBE = { beIdx, injectName ->
+            def be = sql_return_maparray('show backends').get(beIdx)
+            GetDebugPoint().disableDebugPoint(be.Host, be.HttpPort as int, 
NodeType.BE, injectName)
+        }
+
+        sql """
+                CREATE TABLE IF NOT EXISTS t (
+                    k int,
+                    v int
+                )
+                UNIQUE KEY(k)
+                DISTRIBUTED BY HASH(k) BUCKETS 1 properties(
+                    "enable_unique_key_merge_on_write" = "true"
+                );
+            """
+
+        sql 'INSERT INTO t VALUES(1,1),(2,2),(3,3),(4,4)'
+        sql 'INSERT INTO t VALUES(1,10),(2,20),(3,30),(4,40)'
+
+        // inject txn failure, make replica 0 miss version
+        addInjectToBE(0, txnFailureInject, [percent:1.0])
+
+        sql 'INSERT INTO t VALUES(2,200),(4,400),(5,500),(6,600)'
+        sql 'INSERT INTO t VALUES(7,7)'
+        sql 'INSERT INTO t VALUES(8,8)'
+        sql 'INSERT INTO t VALUES(9,9)'
+
+        deleteInjectOnBE(0, txnFailureInject)
+
+        sql 'INSERT INTO t VALUES(10,10)'
+
+        sleep 5000
+
+        // check replica 0 miss version
+        def replica1 = sql_return_maparray('show tablets from t').find { 
it.BackendId.toLong().equals(be1.BackendId.toLong()) }
+        assertNotNull(replica1)
+        assertEquals(3, replica1.VersionCount.toInteger())
+        assertEquals(3, replica1.Version.toInteger())
+        assertEquals(8, replica1.LstFailedVersion.toInteger())
+
+        def tabletId = replica1.TabletId
+        // inject failure on replica 0, which can't clone succeed
+        addInjectToBE(0, reviseTabletMetaInject, [tablet_id:tabletId])
+        // inject on replica 1, force replica 0 full clone from them
+        addInjectToBE(1, fullCloneInject, [tablet_id:tabletId, 
is_full_clone:true])
+        addInjectToBE(2, fullCloneInject, [tablet_id:tabletId, 
is_full_clone:true])
+
+        // start clone
+        setFeConfig('disable_tablet_scheduler', false)
+
+        sleep 10000
+
+        // now, there's lots of full clone failures, remove all debug points, 
make
+        // replica 0 can do normal incremental clone from other replicas
+        deleteInjectOnBE(0, reviseTabletMetaInject)
+        deleteInjectOnBE(1, fullCloneInject)
+        deleteInjectOnBE(2, fullCloneInject)
+
+        sleep 10000
+
+        // make sure the clone succeed
+        replica1 = sql_return_maparray('show tablets from t').find { 
it.BackendId.toLong().equals(be1.BackendId.toLong()) }
+        assertNotNull(replica1)
+        assertEquals(8, replica1.VersionCount.toInteger())
+        assertEquals(8, replica1.Version.toInteger())
+        assertEquals(-1, replica1.LstFailedVersion.toInteger())
+
+        // three replica's content should be consistent
+        sql 'set use_fix_replica=0'
+        qt_sql 'select * from t order by k'
+
+        sql 'set use_fix_replica=1'
+        qt_sql 'select * from t order by k'
+
+        sql 'set use_fix_replica=2'
+        qt_sql 'select * from t order by k'
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to