This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new eff1564cbc6 branch-3.0: [Opt](cloud) Add some injection points for mow 
(#47712) (#47849)
eff1564cbc6 is described below

commit eff1564cbc6867375e55f59ef10eb956200e4c5d
Author: bobhan1 <bao...@selectdb.com>
AuthorDate: Tue Feb 18 16:28:26 2025 +0800

    branch-3.0: [Opt](cloud) Add some injection points for mow (#47712) (#47849)
    
    pick https://github.com/apache/doris/pull/47712
---
 .../cloud/cloud_engine_calc_delete_bitmap_task.cpp |  14 +++
 be/src/olap/base_tablet.cpp                        |  14 +++
 .../cloud/test_cloud_mow_correctness_inject.out    | Bin 0 -> 185 bytes
 .../cloud/test_cloud_mow_correctness_inject.groovy |  94 +++++++++++++++++++++
 .../test_cloud_mow_partial_update_retry.groovy     |   2 +-
 ..._mow_stale_resp_load_compaction_conflict.groovy |   2 +-
 ..._cloud_mow_stale_resp_load_load_conflict.groovy |   2 +-
 7 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp 
b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
index 9de00993117..e85b160cf2f 100644
--- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
+++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp
@@ -20,6 +20,8 @@
 #include <fmt/format.h>
 
 #include <memory>
+#include <random>
+#include <thread>
 
 #include "cloud/cloud_meta_mgr.h"
 #include "cloud/cloud_tablet.h"
@@ -208,6 +210,18 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const {
     }
 
     int64_t t3 = MonotonicMicros();
+    DBUG_EXECUTE_IF("CloudEngineCalcDeleteBitmapTask.handle.inject_sleep", {
+        auto p = dp->param("percent", 0.01);
+        // 100s > Config.calculate_delete_bitmap_task_timeout_seconds = 60s
+        auto sleep_time = dp->param("sleep", 100);
+        std::mt19937 gen {std::random_device {}()};
+        std::bernoulli_distribution inject_fault {p};
+        if (inject_fault(gen)) {
+            LOG_INFO("injection sleep for {} seconds, txn={}, tablet_id={}", 
sleep_time,
+                     _transaction_id, _tablet_id);
+            std::this_thread::sleep_for(std::chrono::seconds(sleep_time));
+        }
+    });
     rowset->set_version(Version(_version, _version));
     TabletTxnInfo txn_info;
     txn_info.rowset = rowset;
diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp
index ff01a898a31..25398e84346 100644
--- a/be/src/olap/base_tablet.cpp
+++ b/be/src/olap/base_tablet.cpp
@@ -20,6 +20,8 @@
 #include <fmt/format.h>
 #include <rapidjson/prettywriter.h>
 
+#include <random>
+
 #include "common/status.h"
 #include "olap/calc_delete_bitmap_executor.h"
 #include "olap/delete_bitmap_calculator.h"
@@ -661,6 +663,18 @@ Status 
BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset,
                 continue;
             }
 
+            
DBUG_EXECUTE_IF("BaseTablet::calc_segment_delete_bitmap.inject_err", {
+                auto p = dp->param("percent", 0.01);
+                std::mt19937 gen {std::random_device {}()};
+                std::bernoulli_distribution inject_fault {p};
+                if (inject_fault(gen)) {
+                    return Status::InternalError(
+                            "injection error in calc_segment_delete_bitmap, "
+                            "tablet_id={}, rowset_id={}",
+                            tablet_id(), rowset_id.to_string());
+                }
+            });
+
             RowsetSharedPtr rowset_find;
             auto st = lookup_row_key(key, rowset_schema.get(), true, 
specified_rowsets, &loc,
                                      dummy_version.first - 1, segment_caches, 
&rowset_find);
diff --git 
a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
 
b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
new file mode 100644
index 00000000000..79839efff32
Binary files /dev/null and 
b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out
 differ
diff --git 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
new file mode 100644
index 00000000000..3c6ce3e8294
--- /dev/null
+++ 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.junit.Assert
+import java.util.concurrent.TimeUnit
+import org.awaitility.Awaitility
+
+// test cases to ensure that inject points for mow correctness work as expected
+suite("test_cloud_mow_correctness_inject", "nonConcurrent") {
+    if (!isCloudMode()) {
+        return
+    }
+
+    GetDebugPoint().clearDebugPointsForAllFEs()
+    GetDebugPoint().clearDebugPointsForAllBEs()
+
+    def table1 = "test_cloud_mow_correctness_inject"
+    sql "DROP TABLE IF EXISTS ${table1} FORCE;"
+    sql """ CREATE TABLE IF NOT EXISTS ${table1} (
+            `k1` int NOT NULL,
+            `c1` int,
+            `c2` int
+            )UNIQUE KEY(k1)
+        DISTRIBUTED BY HASH(k1) BUCKETS 1
+        PROPERTIES (
+            "enable_mow_light_delete" = "false",
+            "enable_unique_key_merge_on_write" = "true",
+            "disable_auto_compaction" = "true",
+            "replication_num" = "1"); """
+
+    sql "insert into ${table1} values(1,1,1);"
+    sql "insert into ${table1} values(2,2,2);"
+    sql "insert into ${table1} values(3,3,3);"
+    sql "sync;"
+    qt_sql "select * from ${table1} order by k1;"
+
+    def customFeConfig = [
+        delete_bitmap_lock_expiration_seconds : 10,
+        calculate_delete_bitmap_task_timeout_seconds : 2,
+        mow_calculate_delete_bitmap_retry_times : 3
+    ]
+
+    setFeConfigTemporary(customFeConfig) {
+        try {
+            // 3 * 2s < 10s
+            
GetDebugPoint().enableDebugPointForAllBEs("CloudEngineCalcDeleteBitmapTask.handle.inject_sleep",
 [percent: "1.0", sleep: "10"])
+
+            test {
+                sql "insert into ${table1} values(4,4,4);"
+                exception "Failed to calculate delete bitmap. Timeout."
+            }
+
+            qt_sql "select * from ${table1} order by k1;"
+
+        } catch(Exception e) {
+            logger.info(e.getMessage())
+            throw e
+        } finally {
+            GetDebugPoint().clearDebugPointsForAllBEs()
+        }
+
+
+        try {
+            
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::calc_segment_delete_bitmap.inject_err",
 [percent: "1.0"])
+
+            test {
+                sql "insert into ${table1} values(5,5,5);"
+                exception "injection error"
+            }
+
+            qt_sql "select * from ${table1} order by k1;"
+        } catch(Exception e) {
+            logger.info(e.getMessage())
+            throw e
+        } finally {
+            GetDebugPoint().clearDebugPointsForAllBEs()
+        }
+
+    }
+}
diff --git 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
index 13abaf1ffca..4f091bef8ea 100644
--- 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
+++ 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy
@@ -56,7 +56,7 @@ suite("test_cloud_mow_partial_update_retry", "nonConcurrent") 
{
             
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block",
 [wait_token: "token1"])
 
             // the first load
-            t1 = Thread.start {
+            def t1 = Thread.start {
                 sql "set enable_unique_key_partial_update=true;"
                 sql "sync;"
                 sql "insert into ${table1}(k1,c1) values(1,999),(2,666);"
diff --git 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
index 7b867088d1b..b380567bf54 100644
--- 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
+++ 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy
@@ -74,7 +74,7 @@ suite("test_cloud_mow_stale_resp_load_compaction_conflict", 
"nonConcurrent") {
             
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block",
 [wait_token: "token1"])
 
             // the first load
-            t1 = Thread.start {
+            def t1 = Thread.start {
                 sql "insert into ${table1} values(1,999,999),(2,888,888);"
             }
 
diff --git 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
index 377ff70cf21..faafb6b8482 100644
--- 
a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
+++ 
b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy
@@ -55,7 +55,7 @@ suite("test_cloud_mow_stale_resp_load_load_conflict", 
"nonConcurrent") {
             
GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block",
 [wait_token: "token1"])
 
             // the first load
-            t1 = Thread.start {
+            def t1 = Thread.start {
                 sql "insert into ${table1} values(1,999,999),(2,888,888);"
             }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to