This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new eff1564cbc6 branch-3.0: [Opt](cloud) Add some injection points for mow (#47712) (#47849) eff1564cbc6 is described below commit eff1564cbc6867375e55f59ef10eb956200e4c5d Author: bobhan1 <bao...@selectdb.com> AuthorDate: Tue Feb 18 16:28:26 2025 +0800 branch-3.0: [Opt](cloud) Add some injection points for mow (#47712) (#47849) pick https://github.com/apache/doris/pull/47712 --- .../cloud/cloud_engine_calc_delete_bitmap_task.cpp | 14 +++ be/src/olap/base_tablet.cpp | 14 +++ .../cloud/test_cloud_mow_correctness_inject.out | Bin 0 -> 185 bytes .../cloud/test_cloud_mow_correctness_inject.groovy | 94 +++++++++++++++++++++ .../test_cloud_mow_partial_update_retry.groovy | 2 +- ..._mow_stale_resp_load_compaction_conflict.groovy | 2 +- ..._cloud_mow_stale_resp_load_load_conflict.groovy | 2 +- 7 files changed, 125 insertions(+), 3 deletions(-) diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp index 9de00993117..e85b160cf2f 100644 --- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp +++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp @@ -20,6 +20,8 @@ #include <fmt/format.h> #include <memory> +#include <random> +#include <thread> #include "cloud/cloud_meta_mgr.h" #include "cloud/cloud_tablet.h" @@ -208,6 +210,18 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const { } int64_t t3 = MonotonicMicros(); + DBUG_EXECUTE_IF("CloudEngineCalcDeleteBitmapTask.handle.inject_sleep", { + auto p = dp->param("percent", 0.01); + // 100s > Config.calculate_delete_bitmap_task_timeout_seconds = 60s + auto sleep_time = dp->param("sleep", 100); + std::mt19937 gen {std::random_device {}()}; + std::bernoulli_distribution inject_fault {p}; + if (inject_fault(gen)) { + LOG_INFO("injection sleep for {} seconds, txn={}, tablet_id={}", sleep_time, + _transaction_id, _tablet_id); + std::this_thread::sleep_for(std::chrono::seconds(sleep_time)); + } + }); rowset->set_version(Version(_version, _version)); TabletTxnInfo txn_info; txn_info.rowset = rowset; diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index ff01a898a31..25398e84346 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -20,6 +20,8 @@ #include <fmt/format.h> #include <rapidjson/prettywriter.h> +#include <random> + #include "common/status.h" #include "olap/calc_delete_bitmap_executor.h" #include "olap/delete_bitmap_calculator.h" @@ -661,6 +663,18 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, continue; } + DBUG_EXECUTE_IF("BaseTablet::calc_segment_delete_bitmap.inject_err", { + auto p = dp->param("percent", 0.01); + std::mt19937 gen {std::random_device {}()}; + std::bernoulli_distribution inject_fault {p}; + if (inject_fault(gen)) { + return Status::InternalError( + "injection error in calc_segment_delete_bitmap, " + "tablet_id={}, rowset_id={}", + tablet_id(), rowset_id.to_string()); + } + }); + RowsetSharedPtr rowset_find; auto st = lookup_row_key(key, rowset_schema.get(), true, specified_rowsets, &loc, dummy_version.first - 1, segment_caches, &rowset_find); diff --git a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out new file mode 100644 index 00000000000..79839efff32 Binary files /dev/null and b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.out differ diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy new file mode 100644 index 00000000000..3c6ce3e8294 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_correctness_inject.groovy @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.Assert +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility + +// test cases to ensure that inject points for mow correctness work as expected +suite("test_cloud_mow_correctness_inject", "nonConcurrent") { + if (!isCloudMode()) { + return + } + + GetDebugPoint().clearDebugPointsForAllFEs() + GetDebugPoint().clearDebugPointsForAllBEs() + + def table1 = "test_cloud_mow_correctness_inject" + sql "DROP TABLE IF EXISTS ${table1} FORCE;" + sql """ CREATE TABLE IF NOT EXISTS ${table1} ( + `k1` int NOT NULL, + `c1` int, + `c2` int + )UNIQUE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + PROPERTIES ( + "enable_mow_light_delete" = "false", + "enable_unique_key_merge_on_write" = "true", + "disable_auto_compaction" = "true", + "replication_num" = "1"); """ + + sql "insert into ${table1} values(1,1,1);" + sql "insert into ${table1} values(2,2,2);" + sql "insert into ${table1} values(3,3,3);" + sql "sync;" + qt_sql "select * from ${table1} order by k1;" + + def customFeConfig = [ + delete_bitmap_lock_expiration_seconds : 10, + calculate_delete_bitmap_task_timeout_seconds : 2, + mow_calculate_delete_bitmap_retry_times : 3 + ] + + setFeConfigTemporary(customFeConfig) { + try { + // 3 * 2s < 10s + GetDebugPoint().enableDebugPointForAllBEs("CloudEngineCalcDeleteBitmapTask.handle.inject_sleep", [percent: "1.0", sleep: "10"]) + + test { + sql "insert into ${table1} values(4,4,4);" + exception "Failed to calculate delete bitmap. Timeout." + } + + qt_sql "select * from ${table1} order by k1;" + + } catch(Exception e) { + logger.info(e.getMessage()) + throw e + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } + + + try { + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::calc_segment_delete_bitmap.inject_err", [percent: "1.0"]) + + test { + sql "insert into ${table1} values(5,5,5);" + exception "injection error" + } + + qt_sql "select * from ${table1} order by k1;" + } catch(Exception e) { + logger.info(e.getMessage()) + throw e + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } + + } +} diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy index 13abaf1ffca..4f091bef8ea 100644 --- a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_partial_update_retry.groovy @@ -56,7 +56,7 @@ suite("test_cloud_mow_partial_update_retry", "nonConcurrent") { GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block", [wait_token: "token1"]) // the first load - t1 = Thread.start { + def t1 = Thread.start { sql "set enable_unique_key_partial_update=true;" sql "sync;" sql "insert into ${table1}(k1,c1) values(1,999),(2,666);" diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy index 7b867088d1b..b380567bf54 100644 --- a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy @@ -74,7 +74,7 @@ suite("test_cloud_mow_stale_resp_load_compaction_conflict", "nonConcurrent") { GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block", [wait_token: "token1"]) // the first load - t1 = Thread.start { + def t1 = Thread.start { sql "insert into ${table1} values(1,999,999),(2,888,888);" } diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy index 377ff70cf21..faafb6b8482 100644 --- a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy @@ -55,7 +55,7 @@ suite("test_cloud_mow_stale_resp_load_load_conflict", "nonConcurrent") { GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block", [wait_token: "token1"]) // the first load - t1 = Thread.start { + def t1 = Thread.start { sql "insert into ${table1} values(1,999,999),(2,888,888);" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org