This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 840503e8d58 [improve](cloud-mow) reduce ms update_delete_bitmap kv confict (#47375) 840503e8d58 is described below commit 840503e8d584b53635b92ead247ac02b3772d48b Author: meiyi <me...@selectdb.com> AuthorDate: Thu Jun 5 14:51:56 2025 +0800 [improve](cloud-mow) reduce ms update_delete_bitmap kv confict (#47375) Problem Summary: 1. `update_delete_bitmap` may split to several transactions to avoid delete bitmap size is larger than the fdb transaction limit 2. multi compaction jobs will change the initiators of the lock_info, which will cause txn_conflict of `update_delete_bitmap`. 3. for update with multi transactions, the txn_confict error is more easily to happen, even after some retries, the `update_delete_bitmap` will fail 4. the root cause is multi compactions should not conflict, pr 48024 solve it 5. but branch-3.0 does not contain pr 48024, so modify the check lock_id to snapshot read to avoid txn_conflict. if lock_id is changed, the final `commit_txn` or `commit_job` can handle it --- cloud/src/meta-service/meta_service.cpp | 14 +++++++++----- cloud/src/meta-service/meta_service.h | 2 ++ cloud/src/meta-service/meta_service_job.cpp | 3 --- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 36b32063a96..45e64f475f6 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1749,10 +1749,12 @@ void MetaServiceImpl::get_tablet_stats(::google::protobuf::RpcController* contro static bool check_delete_bitmap_lock(MetaServiceCode& code, std::string& msg, std::stringstream& ss, std::unique_ptr<Transaction>& txn, int64_t table_id, int64_t lock_id, int64_t lock_initiator, std::string& lock_key, - DeleteBitmapUpdateLockPB& lock_info) { + DeleteBitmapUpdateLockPB& lock_info, + bool snapshot_read = false) { std::string lock_val; - LOG(INFO) << "check_delete_bitmap_lock, table_id=" << table_id << " key=" << hex(lock_key); - auto err = txn->get(lock_key, &lock_val); + LOG(INFO) << "check_delete_bitmap_lock, table_id=" << table_id << " key=" << hex(lock_key) + << ", lock_id=" << lock_id << ", snapshot_read=" << snapshot_read; + auto err = txn->get(lock_key, &lock_val, snapshot_read); TEST_SYNC_POINT_CALLBACK("check_delete_bitmap_lock.inject_get_lock_key_err", &err); if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { msg = "lock id key not found"; @@ -1963,12 +1965,13 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont } bool unlock = request->has_unlock() ? request->unlock() : false; + bool snapshot_read = request->lock_id() == COMPACTION_DELETE_BITMAP_LOCK_ID; if (!unlock) { // 1. Check whether the lock expires std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); DeleteBitmapUpdateLockPB lock_info; if (!check_delete_bitmap_lock(code, msg, ss, txn, table_id, request->lock_id(), - request->initiator(), lock_key, lock_info)) { + request->initiator(), lock_key, lock_info, snapshot_read)) { LOG(WARNING) << "failed to check delete bitmap lock, table_id=" << table_id << " request lock_id=" << request->lock_id() << " request initiator=" << request->initiator() << " msg " << msg; @@ -2079,7 +2082,8 @@ void MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont meta_delete_bitmap_update_lock_key({instance_id, table_id, -1}); DeleteBitmapUpdateLockPB lock_info; if (!check_delete_bitmap_lock(code, msg, ss, txn, table_id, request->lock_id(), - request->initiator(), lock_key, lock_info)) { + request->initiator(), lock_key, lock_info, + snapshot_read)) { LOG(WARNING) << "failed to check delete bitmap lock, table_id=" << table_id << " request lock_id=" << request->lock_id() << " request initiator=" << request->initiator() << " msg " << msg; diff --git a/cloud/src/meta-service/meta_service.h b/cloud/src/meta-service/meta_service.h index 6df09bd2c20..57f88d51dfe 100644 --- a/cloud/src/meta-service/meta_service.h +++ b/cloud/src/meta-service/meta_service.h @@ -39,6 +39,8 @@ namespace doris::cloud { class Transaction; constexpr std::string_view BUILT_IN_STORAGE_VAULT_NAME = "built_in_storage_vault"; +static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; +static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2; void internal_get_rowset(Transaction* txn, int64_t start, int64_t end, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code, diff --git a/cloud/src/meta-service/meta_service_job.cpp b/cloud/src/meta-service/meta_service_job.cpp index 29f1c9993fd..3dd89afbb61 100644 --- a/cloud/src/meta-service/meta_service_job.cpp +++ b/cloud/src/meta-service/meta_service_job.cpp @@ -46,9 +46,6 @@ static inline constexpr size_t get_file_name_offset(const T (&s)[S], size_t i = namespace doris::cloud { -static constexpr int COMPACTION_DELETE_BITMAP_LOCK_ID = -1; -static constexpr int SCHEMA_CHANGE_DELETE_BITMAP_LOCK_ID = -2; - // check compaction input_versions are valid during schema change. // If the schema change job doesnt have alter version, it dont need to check // because the schema change job is come from old version BE. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org