This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 2abbc7898a3 [Opt](cloud) Add inject points for cloud mow (#48190)
2abbc7898a3 is described below

commit 2abbc7898a3d2f06b89ba783afd875239a6558bd
Author: bobhan1 <bao...@selectdb.com>
AuthorDate: Tue Feb 25 22:07:38 2025 +0800

    [Opt](cloud) Add inject points for cloud mow (#48190)
---
 be/src/cloud/cloud_meta_mgr.cpp                    |  11 ++++
 be/src/cloud/cloud_schema_change_job.cpp           |  16 +++++
 .../cloud/test_cloud_mow_correctness_inject.out    | Bin 185 -> 368 bytes
 .../cloud/test_cloud_mow_correctness_inject.groovy |  71 ++++++++++++++++++++-
 4 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index afac47e3645..41e60b5e264 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -1183,6 +1183,17 @@ Status 
CloudMetaMgr::cloud_update_delete_bitmap_without_lock(const CloudTablet&
 
 Status CloudMetaMgr::get_delete_bitmap_update_lock(const CloudTablet& tablet, 
int64_t lock_id,
                                                    int64_t initiator) {
+    DBUG_EXECUTE_IF("get_delete_bitmap_update_lock.inject_fail", {
+        auto p = dp->param("percent", 0.01);
+        std::mt19937 gen {std::random_device {}()};
+        std::bernoulli_distribution inject_fault {p};
+        if (inject_fault(gen)) {
+            return Status::Error<ErrorCode::DELETE_BITMAP_LOCK_ERROR>(
+                    "injection error when get get_delete_bitmap_update_lock, "
+                    "tablet_id={}, lock_id={}, initiator={}",
+                    tablet.tablet_id(), lock_id, initiator);
+        }
+    });
     VLOG_DEBUG << "get_delete_bitmap_update_lock , tablet_id: " << 
tablet.tablet_id()
                << ",lock_id:" << lock_id;
     GetDeleteBitmapUpdateLockRequest req;
diff --git a/be/src/cloud/cloud_schema_change_job.cpp 
b/be/src/cloud/cloud_schema_change_job.cpp
index d12bcdaa01e..7c584d999bf 100644
--- a/be/src/cloud/cloud_schema_change_job.cpp
+++ b/be/src/cloud/cloud_schema_change_job.cpp
@@ -21,6 +21,7 @@
 
 #include <chrono>
 #include <memory>
+#include <random>
 #include <thread>
 
 #include "cloud/cloud_meta_mgr.h"
@@ -463,6 +464,9 @@ Status CloudSchemaChangeJob::_process_delete_bitmap(int64_t 
alter_version,
         }
     }
 
+    
DBUG_EXECUTE_IF("CloudSchemaChangeJob::_process_delete_bitmap.before_new_inc.block",
+                    DBUG_BLOCK);
+
     // step 2, process incremental rowset with delete bitmap update lock
     
RETURN_IF_ERROR(_cloud_storage_engine.meta_mgr().get_delete_bitmap_update_lock(
             *_new_tablet, SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID, initiator));
@@ -484,6 +488,18 @@ Status 
CloudSchemaChangeJob::_process_delete_bitmap(int64_t alter_version,
         }
     }
 
+    
DBUG_EXECUTE_IF("CloudSchemaChangeJob::_process_delete_bitmap.inject_sleep", {
+        auto p = dp->param("percent", 0.01);
+        auto sleep_time = dp->param("sleep", 100);
+        std::mt19937 gen {std::random_device {}()};
+        std::bernoulli_distribution inject_fault {p};
+        if (inject_fault(gen)) {
+            LOG_INFO("injection sleep for {} seconds, tablet_id={}, sc 
job_id={}", sleep_time,
+                     _new_tablet->tablet_id(), _job_id);
+            std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
+        }
+    });
+
     auto& delete_bitmap = tmp_tablet->tablet_meta()->delete_bitmap();
 
     // step4, store delete bitmap
diff --git 
a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
 
b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
index 79839efff32..57619853130 100644
Binary files 
a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
 and 
b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
 differ
diff --git 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
index 3c6ce3e8294..fa447e131d9 100644
--- 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
+++ 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
@@ -39,8 +39,7 @@ suite("test_cloud_mow_correctness_inject", "nonConcurrent") {
         PROPERTIES (
             "enable_mow_light_delete" = "false",
             "enable_unique_key_merge_on_write" = "true",
-            "disable_auto_compaction" = "true",
-            "replication_num" = "1"); """
+            "disable_auto_compaction" = "true"); """
 
     sql "insert into ${table1} values(1,1,1);"
     sql "insert into ${table1} values(2,2,2);"
@@ -48,10 +47,22 @@ suite("test_cloud_mow_correctness_inject", "nonConcurrent") 
{
     sql "sync;"
     qt_sql "select * from ${table1} order by k1;"
 
+    def waitForSC = {
+        Awaitility.await().atMost(30, TimeUnit.SECONDS).pollDelay(100, 
TimeUnit.MILLISECONDS).pollInterval(1000, TimeUnit.MILLISECONDS).until(() -> {
+            def res = sql_return_maparray "SHOW ALTER TABLE COLUMN WHERE 
TableName='${table1}' ORDER BY createtime DESC LIMIT 1"
+            assert res.size() == 1
+            if (res[0].State == "FINISHED" || res[0].State == "CANCELLED") {
+                return true;
+            }
+            return false;
+        });
+    }
+
     def customFeConfig = [
         delete_bitmap_lock_expiration_seconds : 10,
         calculate_delete_bitmap_task_timeout_seconds : 2,
-        mow_calculate_delete_bitmap_retry_times : 3
+        mow_calculate_delete_bitmap_retry_times : 3,
+        enable_schema_change_retry_in_cloud_mode : false // turn off to 
shorten the test's time consumption
     ]
 
     setFeConfigTemporary(customFeConfig) {
@@ -90,5 +101,59 @@ suite("test_cloud_mow_correctness_inject", "nonConcurrent") 
{
             GetDebugPoint().clearDebugPointsForAllBEs()
         }
 
+
+        try {
+            
GetDebugPoint().enableDebugPointForAllBEs("get_delete_bitmap_update_lock.inject_fail",
 [percent: "1.0"])
+            
GetDebugPoint().enableDebugPointForAllBEs("CloudSchemaChangeJob.process_alter_tablet.sleep")
+            sql "alter table ${table1} modify column c2 varchar(100);"
+            Thread.sleep(1000)
+            sql "insert into ${table1} values(10,10,10);"
+            qt_sql "select * from ${table1} order by k1;"
+            Thread.sleep(200)
+            
GetDebugPoint().disableDebugPointForAllBEs("CloudSchemaChangeJob.process_alter_tablet.sleep")
+
+            waitForSC()
+
+            def res = sql_return_maparray "SHOW ALTER TABLE COLUMN WHERE 
TableName='${table1}' ORDER BY createtime DESC LIMIT 1"
+            assert res[0].State == "CANCELLED"
+            assert res[0].Msg.contains("injection error when get 
get_delete_bitmap_update_lock")
+
+            qt_sql "select * from ${table1} order by k1;"
+        } catch(Exception e) {
+            logger.info(e.getMessage())
+            throw e
+        } finally {
+            GetDebugPoint().clearDebugPointsForAllBEs()
+        }
+
+
+        try {
+            // sleep enough time to let sc's delete bitmap lock expired
+            
GetDebugPoint().enableDebugPointForAllBEs("CloudSchemaChangeJob::_process_delete_bitmap.inject_sleep",
 [percent: "1.0", sleep: "20"])
+            
GetDebugPoint().enableDebugPointForAllBEs("CloudSchemaChangeJob::_process_delete_bitmap.before_new_inc.block")
+            sql "alter table ${table1} modify column c2 varchar(100);"
+            Thread.sleep(3000)
+            sql "insert into ${table1} values(11,11,11);"
+            qt_sql "select * from ${table1} order by k1;"
+            Thread.sleep(1000)
+            
GetDebugPoint().disableDebugPointForAllBEs("CloudSchemaChangeJob::_process_delete_bitmap.before_new_inc.block")
+
+            // wait until sc's delete bitmap expired
+            Thread.sleep(10000)
+            sql "insert into ${table1} values(12,12,12);"
+
+            waitForSC()
+
+            def res = sql_return_maparray "SHOW ALTER TABLE COLUMN WHERE 
TableName='${table1}' ORDER BY createtime DESC LIMIT 1"
+            assert res[0].State == "CANCELLED"
+            assert res[0].Msg.contains("[DELETE_BITMAP_LOCK_ERROR]lock expired 
when update delete bitmap")
+
+            qt_sql "select * from ${table1} order by k1;"
+        } catch(Exception e) {
+            logger.info(e.getMessage())
+            throw e
+        } finally {
+            GetDebugPoint().clearDebugPointsForAllBEs()
+        }
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to