This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new dbd5da56611 branch-3.1: [improve](cloud-mow)Add some metrics aboout 
delete bitmap update lock #47988 (#52123)
dbd5da56611 is described below

commit dbd5da56611143895089d6bb897ef718d815bebf
Author: meiyi <[email protected]>
AuthorDate: Mon Jun 23 19:16:35 2025 +0800

    branch-3.1: [improve](cloud-mow)Add some metrics aboout delete bitmap 
update lock #47988 (#52123)
    
    Cherry-pick from #47988
---
 be/src/cloud/cloud_base_compaction.cpp             | 11 ++++-
 be/src/cloud/cloud_cumulative_compaction.cpp       | 11 ++++-
 be/src/cloud/cloud_tablet.cpp                      |  3 +-
 be/src/cloud/cloud_tablet.h                        | 12 +++---
 .../routine_load/routine_load_task_executor.cpp    |  6 +++
 cloud/src/common/bvars.cpp                         | 12 ++++++
 cloud/src/common/bvars.h                           | 10 +++++
 cloud/src/meta-service/meta_service.cpp            | 12 ++++++
 cloud/src/meta-service/meta_service_job.cpp        | 32 +++++++++-----
 cloud/src/meta-service/meta_service_txn.cpp        |  9 ++++
 .../transaction/CloudGlobalTransactionMgr.java     | 50 ++++++++++++++++++++++
 .../java/org/apache/doris/metric/MetricRepo.java   | 23 ++++++++++
 .../doris/transaction/GlobalTransactionMgr.java    |  5 +++
 .../transaction/GlobalTransactionMgrIface.java     |  2 +
 14 files changed, 178 insertions(+), 20 deletions(-)

diff --git a/be/src/cloud/cloud_base_compaction.cpp 
b/be/src/cloud/cloud_base_compaction.cpp
index 2b9fd1c2e56..38dc72762e9 100644
--- a/be/src/cloud/cloud_base_compaction.cpp
+++ b/be/src/cloud/cloud_base_compaction.cpp
@@ -35,6 +35,8 @@ namespace doris {
 using namespace ErrorCode;
 
 bvar::Adder<uint64_t> base_output_size("base_compaction", "output_size");
+bvar::LatencyRecorder g_base_compaction_hold_delete_bitmap_lock_time_ms(
+        "base_compaction_hold_delete_bitmap_lock_time_ms");
 
 CloudBaseCompaction::CloudBaseCompaction(CloudStorageEngine& engine, 
CloudTabletSPtr tablet)
         : CloudCompactionMixin(engine, tablet,
@@ -349,13 +351,14 @@ Status CloudBaseCompaction::modify_rowsets() {
     
compaction_job->set_segment_size_output_rowsets(_output_rowset->data_disk_size());
 
     DeleteBitmapPtr output_rowset_delete_bitmap = nullptr;
+    int64_t get_delete_bitmap_lock_start_time = 0;
     if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
         _tablet->enable_unique_key_merge_on_write()) {
         int64_t initiator = this->initiator();
         RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaction(
                 _input_rowsets, _output_rowset, *_rowid_conversion, 
compaction_type(),
                 _stats.merged_rows, initiator, output_rowset_delete_bitmap,
-                _allow_delete_in_cumu_compaction));
+                _allow_delete_in_cumu_compaction, 
get_delete_bitmap_lock_start_time));
         LOG_INFO("update delete bitmap in CloudBaseCompaction, tablet_id={}, 
range=[{}-{}]",
                  _tablet->tablet_id(), _input_rowsets.front()->start_version(),
                  _input_rowsets.back()->end_version())
@@ -370,6 +373,12 @@ Status CloudBaseCompaction::modify_rowsets() {
 
     cloud::FinishTabletJobResponse resp;
     auto st = _engine.meta_mgr().commit_tablet_job(job, &resp);
+    if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
+        _tablet->enable_unique_key_merge_on_write()) {
+        int64_t hold_delete_bitmap_lock_time_ms =
+                (MonotonicMicros() - get_delete_bitmap_lock_start_time) / 1000;
+        g_base_compaction_hold_delete_bitmap_lock_time_ms << 
hold_delete_bitmap_lock_time_ms;
+    }
     if (!st.ok()) {
         if (resp.status().code() == cloud::TABLET_NOT_FOUND) {
             cloud_tablet()->clear_cache();
diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp 
b/be/src/cloud/cloud_cumulative_compaction.cpp
index 097bdc9bf4a..c0aefe33a27 100644
--- a/be/src/cloud/cloud_cumulative_compaction.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction.cpp
@@ -37,6 +37,8 @@ namespace doris {
 using namespace ErrorCode;
 
 bvar::Adder<uint64_t> cumu_output_size("cumu_compaction", "output_size");
+bvar::LatencyRecorder g_cu_compaction_hold_delete_bitmap_lock_time_ms(
+        "cu_compaction_hold_delete_bitmap_lock_time_ms");
 
 CloudCumulativeCompaction::CloudCumulativeCompaction(CloudStorageEngine& 
engine,
                                                      CloudTabletSPtr tablet)
@@ -289,12 +291,13 @@ Status CloudCumulativeCompaction::modify_rowsets() {
 
     DeleteBitmapPtr output_rowset_delete_bitmap = nullptr;
     int64_t initiator = this->initiator();
+    int64_t get_delete_bitmap_lock_start_time = 0;
     if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
         _tablet->enable_unique_key_merge_on_write()) {
         RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaction(
                 _input_rowsets, _output_rowset, *_rowid_conversion, 
compaction_type(),
                 _stats.merged_rows, initiator, output_rowset_delete_bitmap,
-                _allow_delete_in_cumu_compaction));
+                _allow_delete_in_cumu_compaction, 
get_delete_bitmap_lock_start_time));
         LOG_INFO("update delete bitmap in CloudCumulativeCompaction, 
tablet_id={}, range=[{}-{}]",
                  _tablet->tablet_id(), _input_rowsets.front()->start_version(),
                  _input_rowsets.back()->end_version())
@@ -317,6 +320,12 @@ Status CloudCumulativeCompaction::modify_rowsets() {
     });
     cloud::FinishTabletJobResponse resp;
     auto st = _engine.meta_mgr().commit_tablet_job(job, &resp);
+    if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
+        _tablet->enable_unique_key_merge_on_write()) {
+        int64_t hold_delete_bitmap_lock_time_ms =
+                (MonotonicMicros() - get_delete_bitmap_lock_start_time) / 1000;
+        g_cu_compaction_hold_delete_bitmap_lock_time_ms << 
hold_delete_bitmap_lock_time_ms;
+    }
     if (resp.has_alter_version()) {
         
(static_cast<CloudTablet*>(_tablet.get()))->set_alter_version(resp.alter_version());
     }
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index c044b8361b7..75852fbe92c 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -927,7 +927,7 @@ Status CloudTablet::calc_delete_bitmap_for_compaction(
         const std::vector<RowsetSharedPtr>& input_rowsets, const 
RowsetSharedPtr& output_rowset,
         const RowIdConversion& rowid_conversion, ReaderType compaction_type, 
int64_t merged_rows,
         int64_t initiator, DeleteBitmapPtr& output_rowset_delete_bitmap,
-        bool allow_delete_in_cumu_compaction) {
+        bool allow_delete_in_cumu_compaction, int64_t& 
get_delete_bitmap_lock_start_time) {
     output_rowset_delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id());
     std::unique_ptr<RowLocationSet> missed_rows;
     if ((config::enable_missing_rows_correctness_check ||
@@ -980,6 +980,7 @@ Status CloudTablet::calc_delete_bitmap_for_compaction(
     RETURN_IF_ERROR(_engine.meta_mgr().get_delete_bitmap_update_lock(
             *this, COMPACTION_DELETE_BITMAP_LOCK_ID, initiator));
     int64_t t2 = MonotonicMicros();
+    get_delete_bitmap_lock_start_time = t2;
     RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this));
     int64_t t3 = MonotonicMicros();
 
diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h
index ee3fdc35c6a..8052c39f3a7 100644
--- a/be/src/cloud/cloud_tablet.h
+++ b/be/src/cloud/cloud_tablet.h
@@ -220,13 +220,11 @@ public:
     Status save_delete_bitmap_to_ms(int64_t cur_version, int64_t txn_id,
                                     DeleteBitmapPtr delete_bitmap, int64_t 
next_visible_version);
 
-    Status calc_delete_bitmap_for_compaction(const 
std::vector<RowsetSharedPtr>& input_rowsets,
-                                             const RowsetSharedPtr& 
output_rowset,
-                                             const RowIdConversion& 
rowid_conversion,
-                                             ReaderType compaction_type, 
int64_t merged_rows,
-                                             int64_t initiator,
-                                             DeleteBitmapPtr& 
output_rowset_delete_bitmap,
-                                             bool 
allow_delete_in_cumu_compaction);
+    Status calc_delete_bitmap_for_compaction(
+            const std::vector<RowsetSharedPtr>& input_rowsets, const 
RowsetSharedPtr& output_rowset,
+            const RowIdConversion& rowid_conversion, ReaderType 
compaction_type,
+            int64_t merged_rows, int64_t initiator, DeleteBitmapPtr& 
output_rowset_delete_bitmap,
+            bool allow_delete_in_cumu_compaction, int64_t& 
get_delete_bitmap_lock_start_time);
 
     // Find the missed versions until the spec_version.
     //
diff --git a/be/src/runtime/routine_load/routine_load_task_executor.cpp 
b/be/src/runtime/routine_load/routine_load_task_executor.cpp
index 8db18d429e7..2cd5f16929c 100644
--- a/be/src/runtime/routine_load/routine_load_task_executor.cpp
+++ b/be/src/runtime/routine_load/routine_load_task_executor.cpp
@@ -62,6 +62,9 @@ using namespace ErrorCode;
 
 DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(routine_load_task_count, 
MetricUnit::NOUNIT);
 
+bvar::LatencyRecorder 
g_routine_load_commit_and_publish_latency_ms("routine_load",
+                                                                   
"commit_and_publish_ms");
+
 RoutineLoadTaskExecutor::RoutineLoadTaskExecutor(ExecEnv* exec_env) : 
_exec_env(exec_env) {
     REGISTER_HOOK_METRIC(routine_load_task_count, [this]() {
         // std::lock_guard<std::mutex> l(_lock);
@@ -457,7 +460,10 @@ void 
RoutineLoadTaskExecutor::exec_task(std::shared_ptr<StreamLoadContext> ctx,
     consumer_pool->return_consumers(consumer_grp.get());
 
     // commit txn
+    int64_t commit_and_publish_start_time = MonotonicNanos();
     HANDLE_ERROR(_exec_env->stream_load_executor()->commit_txn(ctx.get()), 
"commit failed");
+    g_routine_load_commit_and_publish_latency_ms
+            << (MonotonicNanos() - commit_and_publish_start_time) / 1000000;
     // commit kafka offset
     switch (ctx->load_src_type) {
     case TLoadSourceType::KAFKA: {
diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp
index fe887760f7f..6eb4c31a670 100644
--- a/cloud/src/common/bvars.cpp
+++ b/cloud/src/common/bvars.cpp
@@ -117,6 +117,18 @@ bvar::Adder<int64_t> g_bvar_txn_kv_commit_error_counter;
 bvar::Window<bvar::Adder<int64_t> > 
g_bvar_txn_kv_commit_error_counter_minute("txn_kv", "commit_error", 
&g_bvar_txn_kv_commit_error_counter, 60);
 bvar::Adder<int64_t> g_bvar_txn_kv_commit_conflict_counter;
 bvar::Window<bvar::Adder<int64_t> > 
g_bvar_txn_kv_commit_conflict_counter_minute("txn_kv", "commit_conflict", 
&g_bvar_txn_kv_commit_conflict_counter, 60);
+bvar::Adder<int64_t> g_bvar_delete_bitmap_lock_txn_put_conflict_counter;
+bvar::Window<bvar::Adder<int64_t> > 
g_bvar_delete_bitmap_lock_txn_put_conflict_counter_minute("delete_bitmap_lock", 
"txn_put_conflict", &g_bvar_delete_bitmap_lock_txn_put_conflict_counter, 60);
+bvar::Adder<int64_t> 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter;
+bvar::Window<bvar::Adder<int64_t> > 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter_minute("delete_bitmap_lock",
 "txn_remove_conflict_by_fail", 
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter, 60);
+bvar::Adder<int64_t> 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter;
+bvar::Window<bvar::Adder<int64_t> > 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter_minute("delete_bitmap_lock",
 "txn_remove_conflict_by_load", 
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter, 60);
+bvar::Adder<int64_t> 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter;
+bvar::Window<bvar::Adder<int64_t> > 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter_minute("delete_bitmap_lock",
 "txn_remove_conflict_by_compaction_commit", 
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter, 
60);
+bvar::Adder<int64_t> 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter;
+bvar::Window<bvar::Adder<int64_t> > 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter_minute("delete_bitmap_lock",
 "txn_remove_conflict_by_compaction_lease", 
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter, 60);
+bvar::Adder<int64_t> 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter;
+bvar::Window<bvar::Adder<int64_t> > 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter_minute("delete_bitmap_lock",
 "txn_remove_conflict_by_compaction_abort", 
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter, 60);
 
 // fdb's bvars
 const int64_t BVAR_FDB_INVALID_VALUE = -99999999L;
diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h
index 7f616615394..dce05dc3c57 100644
--- a/cloud/src/common/bvars.h
+++ b/cloud/src/common/bvars.h
@@ -189,6 +189,16 @@ extern bvar::Adder<int64_t> 
g_bvar_txn_kv_commit_error_counter;
 extern bvar::Adder<int64_t> g_bvar_txn_kv_commit_conflict_counter;
 extern bvar::Adder<int64_t> g_bvar_txn_kv_get_count_normalized;
 
+extern bvar::Adder<int64_t> g_bvar_delete_bitmap_lock_txn_put_conflict_counter;
+extern bvar::Adder<int64_t> 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter;
+extern bvar::Adder<int64_t> 
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter;
+extern bvar::Adder<int64_t>
+        
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter;
+extern bvar::Adder<int64_t>
+        
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter;
+extern bvar::Adder<int64_t>
+        
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter;
+
 extern const int64_t BVAR_FDB_INVALID_VALUE;
 extern bvar::Status<int64_t> g_bvar_fdb_client_count;
 extern bvar::Status<int64_t> g_bvar_fdb_configuration_coordinators_count;
diff --git a/cloud/src/meta-service/meta_service.cpp 
b/cloud/src/meta-service/meta_service.cpp
index f5b904a6f66..2de89f26786 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -2312,6 +2312,9 @@ void 
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
     total_txn_size += txn->approximate_bytes();
     total_txn_count++;
     if (err != TxnErrorCode::TXN_OK) {
+        if (err == TxnErrorCode::TXN_CONFLICT) {
+            g_bvar_delete_bitmap_lock_txn_put_conflict_counter << 1;
+        }
         code = cast_as<ErrCategory::COMMIT>(err);
         ss << "failed to update delete bitmap, err=" << err << " tablet_id=" 
<< tablet_id
            << " lock_id=" << request->lock_id() << " initiator=" << 
request->initiator()
@@ -2790,12 +2793,18 @@ void MetaServiceImpl::get_delete_bitmap_update_lock_v2(
                    request->lock_id() == COMPACTION_DELETE_BITMAP_LOCK_ID &&
                    config::delete_bitmap_enable_retry_txn_conflict && 
first_retry) {
             // if err is TXN_CONFLICT, and the lock id is -1, do a fast retry
+            if (err == TxnErrorCode::TXN_CONFLICT) {
+                g_bvar_delete_bitmap_lock_txn_put_conflict_counter << 1;
+            }
             LOG(INFO) << "fast retry to get_delete_bitmap_update_lock, 
tablet_id="
                       << request->table_id() << " lock_id=" << 
request->lock_id()
                       << ", initiator=" << request->initiator() << ", err=" << 
err;
             first_retry = false;
             continue;
         } else {
+            if (err == TxnErrorCode::TXN_CONFLICT) {
+                g_bvar_delete_bitmap_lock_txn_put_conflict_counter << 1;
+            }
             code = cast_as<ErrCategory::COMMIT>(err);
             ss << "failed to get_delete_bitmap_update_lock, lock_id=" << 
request->lock_id()
                << ", initiator=" << request->initiator() << ", err=" << err;
@@ -3176,6 +3185,9 @@ void MetaServiceImpl::remove_delete_bitmap_update_lock_v2(
     }
     err = txn->commit();
     if (err != TxnErrorCode::TXN_OK) {
+        if (err == TxnErrorCode::TXN_CONFLICT) {
+            g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter << 1;
+        }
         code = cast_as<ErrCategory::COMMIT>(err);
         ss << "failed to remove delete bitmap tablet lock , err=" << err;
         msg = ss.str();
diff --git a/cloud/src/meta-service/meta_service_job.cpp 
b/cloud/src/meta-service/meta_service_job.cpp
index f4897b6ed44..b00d834e322 100644
--- a/cloud/src/meta-service/meta_service_job.cpp
+++ b/cloud/src/meta-service/meta_service_job.cpp
@@ -25,6 +25,7 @@
 #include <cstddef>
 #include <sstream>
 
+#include "common/bvars.h"
 #include "common/config.h"
 #include "common/logging.h"
 #include "common/util.h"
@@ -1561,18 +1562,29 @@ void 
MetaServiceImpl::finish_tablet_job(::google::protobuf::RpcController* contr
     recorded_job.ParseFromString(job_val);
     VLOG_DEBUG << "get tablet job, tablet_id=" << tablet_id
                << " job=" << proto_to_json(recorded_job);
+    FinishTabletJobRequest_Action action = request->action();
 
-    std::unique_ptr<int, std::function<void(int*)>> defer_commit(
-            (int*)0x01, [&ss, &txn, &code, &msg, &need_commit](int*) {
-                if (!need_commit) return;
-                TxnErrorCode err = txn->commit();
-                if (err != TxnErrorCode::TXN_OK) {
-                    code = cast_as<ErrCategory::COMMIT>(err);
-                    ss << "failed to commit job kv, err=" << err;
-                    msg = ss.str();
-                    return;
+    std::unique_ptr<int, std::function<void(int*)>> defer_commit((int*)0x01, 
[&ss, &txn, &code,
+                                                                              
&msg, &need_commit,
+                                                                              
&action](int*) {
+        if (!need_commit) return;
+        TxnErrorCode err = txn->commit();
+        if (err != TxnErrorCode::TXN_OK) {
+            if (err == TxnErrorCode::TXN_CONFLICT) {
+                if (action == FinishTabletJobRequest::COMMIT) {
+                    
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter << 1;
+                } else if (action == FinishTabletJobRequest::LEASE) {
+                    
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter << 1;
+                } else if (action == FinishTabletJobRequest::ABORT) {
+                    
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter << 1;
                 }
-            });
+            }
+            code = cast_as<ErrCategory::COMMIT>(err);
+            ss << "failed to commit job kv, err=" << err;
+            msg = ss.str();
+            return;
+        }
+    });
 
     // Process compaction commit
     if (!request->job().compaction().empty()) {
diff --git a/cloud/src/meta-service/meta_service_txn.cpp 
b/cloud/src/meta-service/meta_service_txn.cpp
index 490c7bb616b..7f009004003 100644
--- a/cloud/src/meta-service/meta_service_txn.cpp
+++ b/cloud/src/meta-service/meta_service_txn.cpp
@@ -1379,6 +1379,9 @@ void commit_txn_immediately(
         // Finally we are done...
         err = txn->commit();
         if (err != TxnErrorCode::TXN_OK) {
+            if (err == TxnErrorCode::TXN_CONFLICT) {
+                g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter 
<< 1;
+            }
             code = cast_as<ErrCategory::COMMIT>(err);
             ss << "failed to commit kv txn, txn_id=" << txn_id << " err=" << 
err;
             msg = ss.str();
@@ -1893,6 +1896,9 @@ void commit_txn_eventually(
 
         err = txn->commit();
         if (err != TxnErrorCode::TXN_OK) {
+            if (err == TxnErrorCode::TXN_CONFLICT) {
+                g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter 
<< 1;
+            }
             code = cast_as<ErrCategory::COMMIT>(err);
             ss << "failed to commit kv txn, txn_id=" << txn_id << " err=" << 
err;
             msg = ss.str();
@@ -2482,6 +2488,9 @@ void commit_txn_with_sub_txn(const CommitTxnRequest* 
request, CommitTxnResponse*
     // Finally we are done...
     err = txn->commit();
     if (err != TxnErrorCode::TXN_OK) {
+        if (err == TxnErrorCode::TXN_CONFLICT) {
+            g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter << 1;
+        }
         if (err == TxnErrorCode::TXN_VALUE_TOO_LARGE || err == 
TxnErrorCode::TXN_BYTES_TOO_LARGE) {
             size_t max_size = 0, max_num_segments = 0,
                    min_num_segments = std::numeric_limits<size_t>::max(), 
avg_num_segments = 0;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
index 40e5bb834df..520539d0704 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
@@ -156,9 +156,50 @@ public class CloudGlobalTransactionMgr implements 
GlobalTransactionMgrIface {
     private static final Logger LOG = 
LogManager.getLogger(CloudGlobalTransactionMgr.class);
     private static final String NOT_SUPPORTED_MSG = "Not supported in cloud 
mode";
 
+    class CommitCostTimeStatistic {
+        long waitCommitLockCostTimeMs = 0;
+        long waitDeleteBitmapLockCostTimeMs = 0;
+        long calculateDeleteBitmapCostTimeMs = 0;
+
+        long commitToMsCostTimeMs = 0;
+
+        public long getCommitToMsCostTimeMs() {
+            return commitToMsCostTimeMs;
+        }
+
+        public void setCommitToMsCostTimeMs(long commitToMsCostTimeMs) {
+            this.commitToMsCostTimeMs = commitToMsCostTimeMs;
+        }
+
+        public long getCalculateDeleteBitmapCostTimeMs() {
+            return calculateDeleteBitmapCostTimeMs;
+        }
+
+        public void setCalculateDeleteBitmapCostTimeMs(long 
calculateDeleteBitmapCostTimeMs) {
+            this.calculateDeleteBitmapCostTimeMs = 
calculateDeleteBitmapCostTimeMs;
+        }
+
+        public long getWaitCommitLockCostTimeMs() {
+            return waitCommitLockCostTimeMs;
+        }
+
+        public void setWaitCommitLockCostTimeMs(long waitCommitLockCostTimeMs) 
{
+            this.waitCommitLockCostTimeMs = waitCommitLockCostTimeMs;
+        }
+
+        public long getWaitDeleteBitmapLockCostTimeMs() {
+            return waitDeleteBitmapLockCostTimeMs;
+        }
+
+        public void setWaitDeleteBitmapLockCostTimeMs(long 
waitDeleteBitmapLockCostTimeMs) {
+            this.waitDeleteBitmapLockCostTimeMs = 
waitDeleteBitmapLockCostTimeMs;
+        }
+    }
+
     private TxnStateCallbackFactory callbackFactory;
     private final Map<Long, Long> subTxnIdToTxnId = new ConcurrentHashMap<>();
     private Map<Long, AtomicInteger> waitToCommitTxnCountMap = new 
ConcurrentHashMap<>();
+    private Map<Long, CommitCostTimeStatistic> commitCostTimeStatisticMap = 
new ConcurrentHashMap<>();
 
     // dbId -> tableId -> txnId
     private Map<Long, Map<Long, Long>> lastTxnIdMap = Maps.newConcurrentMap();
@@ -2140,6 +2181,15 @@ public class CloudGlobalTransactionMgr implements 
GlobalTransactionMgrIface {
         }
     }
 
+    @Override
+    public int getQueueLength() {
+        int count = 0;
+        for (Map.Entry<Long, AtomicInteger> entry : 
waitToCommitTxnCountMap.entrySet()) {
+            count += entry.getValue().get();
+        }
+        return count;
+    }
+
     private void decreaseWaitingLockCount(List<Table> tableList) {
         for (int i = 0; i < tableList.size(); i++) {
             long tableId = tableList.get(i).getId();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java 
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index 3e216739720..4e3257c3822 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -161,6 +161,11 @@ public final class MetricRepo {
 
     public static Histogram HISTO_COMMIT_AND_PUBLISH_LATENCY;
 
+    public static Histogram HISTO_GET_DELETE_BITMAP_UPDATE_LOCK_LATENCY;
+    public static Histogram HISTO_GET_COMMIT_LOCK_LATENCY;
+    public static Histogram HISTO_CALCULATE_DELETE_BITMAP_LATENCY;
+    public static Histogram HISTO_COMMIT_TO_MS_LATENCY;
+
     // Catlaog/Database/Table num
     public static GaugeMetric<Integer> GAUGE_CATALOG_NUM;
     public static GaugeMetric<Integer> GAUGE_INTERNAL_DATABASE_NUM;
@@ -589,6 +594,24 @@ public final class MetricRepo {
         HISTO_COMMIT_AND_PUBLISH_LATENCY = METRIC_REGISTER.histogram(
                 MetricRegistry.name("txn_commit_and_publish", "latency", 
"ms"));
 
+        GaugeMetric<Integer> commitQueueLength = new 
GaugeMetric<Integer>("commit_queue_length",
+                MetricUnit.NOUNIT, "commit queue length") {
+            @Override
+            public Integer getValue() {
+                return 
Env.getCurrentEnv().getGlobalTransactionMgr().getQueueLength();
+            }
+        };
+        DORIS_METRIC_REGISTER.addMetrics(commitQueueLength);
+
+        HISTO_GET_DELETE_BITMAP_UPDATE_LOCK_LATENCY = 
METRIC_REGISTER.histogram(
+                MetricRegistry.name("get_delete_bitmap_update_lock", 
"latency", "ms"));
+        HISTO_GET_COMMIT_LOCK_LATENCY = METRIC_REGISTER.histogram(
+                MetricRegistry.name("get_commit_lock", "latency", "ms"));
+        HISTO_CALCULATE_DELETE_BITMAP_LATENCY = METRIC_REGISTER.histogram(
+                MetricRegistry.name("calculate_delete_bitmap", "latency", 
"ms"));
+        HISTO_COMMIT_TO_MS_LATENCY = METRIC_REGISTER.histogram(
+                MetricRegistry.name("commit_to_ms", "latency", "ms"));
+
         GAUGE_CATALOG_NUM = new GaugeMetric<Integer>("catalog_num",
                 MetricUnit.NOUNIT, "total catalog num") {
             @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
index 6552c65d412..64a91459fbe 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
@@ -999,4 +999,9 @@ public class GlobalTransactionMgr implements 
GlobalTransactionMgrIface {
             LOG.warn("remove sub transaction failed. db " + dbId, e);
         }
     }
+
+    @Override
+    public int getQueueLength() {
+        return 0;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
index 38c8cdd5a6d..c23787eca4d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
@@ -206,4 +206,6 @@ public interface GlobalTransactionMgrIface extends Writable 
{
 
     public List<TransactionState> getUnFinishedPreviousLoad(long 
endTransactionId,
                 long dbId, List<Long> tableIdList) throws UserException;
+
+    public int getQueueLength();
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to