This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new dbd5da56611 branch-3.1: [improve](cloud-mow)Add some metrics aboout
delete bitmap update lock #47988 (#52123)
dbd5da56611 is described below
commit dbd5da56611143895089d6bb897ef718d815bebf
Author: meiyi <[email protected]>
AuthorDate: Mon Jun 23 19:16:35 2025 +0800
branch-3.1: [improve](cloud-mow)Add some metrics aboout delete bitmap
update lock #47988 (#52123)
Cherry-pick from #47988
---
be/src/cloud/cloud_base_compaction.cpp | 11 ++++-
be/src/cloud/cloud_cumulative_compaction.cpp | 11 ++++-
be/src/cloud/cloud_tablet.cpp | 3 +-
be/src/cloud/cloud_tablet.h | 12 +++---
.../routine_load/routine_load_task_executor.cpp | 6 +++
cloud/src/common/bvars.cpp | 12 ++++++
cloud/src/common/bvars.h | 10 +++++
cloud/src/meta-service/meta_service.cpp | 12 ++++++
cloud/src/meta-service/meta_service_job.cpp | 32 +++++++++-----
cloud/src/meta-service/meta_service_txn.cpp | 9 ++++
.../transaction/CloudGlobalTransactionMgr.java | 50 ++++++++++++++++++++++
.../java/org/apache/doris/metric/MetricRepo.java | 23 ++++++++++
.../doris/transaction/GlobalTransactionMgr.java | 5 +++
.../transaction/GlobalTransactionMgrIface.java | 2 +
14 files changed, 178 insertions(+), 20 deletions(-)
diff --git a/be/src/cloud/cloud_base_compaction.cpp
b/be/src/cloud/cloud_base_compaction.cpp
index 2b9fd1c2e56..38dc72762e9 100644
--- a/be/src/cloud/cloud_base_compaction.cpp
+++ b/be/src/cloud/cloud_base_compaction.cpp
@@ -35,6 +35,8 @@ namespace doris {
using namespace ErrorCode;
bvar::Adder<uint64_t> base_output_size("base_compaction", "output_size");
+bvar::LatencyRecorder g_base_compaction_hold_delete_bitmap_lock_time_ms(
+ "base_compaction_hold_delete_bitmap_lock_time_ms");
CloudBaseCompaction::CloudBaseCompaction(CloudStorageEngine& engine,
CloudTabletSPtr tablet)
: CloudCompactionMixin(engine, tablet,
@@ -349,13 +351,14 @@ Status CloudBaseCompaction::modify_rowsets() {
compaction_job->set_segment_size_output_rowsets(_output_rowset->data_disk_size());
DeleteBitmapPtr output_rowset_delete_bitmap = nullptr;
+ int64_t get_delete_bitmap_lock_start_time = 0;
if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
_tablet->enable_unique_key_merge_on_write()) {
int64_t initiator = this->initiator();
RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaction(
_input_rowsets, _output_rowset, *_rowid_conversion,
compaction_type(),
_stats.merged_rows, initiator, output_rowset_delete_bitmap,
- _allow_delete_in_cumu_compaction));
+ _allow_delete_in_cumu_compaction,
get_delete_bitmap_lock_start_time));
LOG_INFO("update delete bitmap in CloudBaseCompaction, tablet_id={},
range=[{}-{}]",
_tablet->tablet_id(), _input_rowsets.front()->start_version(),
_input_rowsets.back()->end_version())
@@ -370,6 +373,12 @@ Status CloudBaseCompaction::modify_rowsets() {
cloud::FinishTabletJobResponse resp;
auto st = _engine.meta_mgr().commit_tablet_job(job, &resp);
+ if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
+ _tablet->enable_unique_key_merge_on_write()) {
+ int64_t hold_delete_bitmap_lock_time_ms =
+ (MonotonicMicros() - get_delete_bitmap_lock_start_time) / 1000;
+ g_base_compaction_hold_delete_bitmap_lock_time_ms <<
hold_delete_bitmap_lock_time_ms;
+ }
if (!st.ok()) {
if (resp.status().code() == cloud::TABLET_NOT_FOUND) {
cloud_tablet()->clear_cache();
diff --git a/be/src/cloud/cloud_cumulative_compaction.cpp
b/be/src/cloud/cloud_cumulative_compaction.cpp
index 097bdc9bf4a..c0aefe33a27 100644
--- a/be/src/cloud/cloud_cumulative_compaction.cpp
+++ b/be/src/cloud/cloud_cumulative_compaction.cpp
@@ -37,6 +37,8 @@ namespace doris {
using namespace ErrorCode;
bvar::Adder<uint64_t> cumu_output_size("cumu_compaction", "output_size");
+bvar::LatencyRecorder g_cu_compaction_hold_delete_bitmap_lock_time_ms(
+ "cu_compaction_hold_delete_bitmap_lock_time_ms");
CloudCumulativeCompaction::CloudCumulativeCompaction(CloudStorageEngine&
engine,
CloudTabletSPtr tablet)
@@ -289,12 +291,13 @@ Status CloudCumulativeCompaction::modify_rowsets() {
DeleteBitmapPtr output_rowset_delete_bitmap = nullptr;
int64_t initiator = this->initiator();
+ int64_t get_delete_bitmap_lock_start_time = 0;
if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
_tablet->enable_unique_key_merge_on_write()) {
RETURN_IF_ERROR(cloud_tablet()->calc_delete_bitmap_for_compaction(
_input_rowsets, _output_rowset, *_rowid_conversion,
compaction_type(),
_stats.merged_rows, initiator, output_rowset_delete_bitmap,
- _allow_delete_in_cumu_compaction));
+ _allow_delete_in_cumu_compaction,
get_delete_bitmap_lock_start_time));
LOG_INFO("update delete bitmap in CloudCumulativeCompaction,
tablet_id={}, range=[{}-{}]",
_tablet->tablet_id(), _input_rowsets.front()->start_version(),
_input_rowsets.back()->end_version())
@@ -317,6 +320,12 @@ Status CloudCumulativeCompaction::modify_rowsets() {
});
cloud::FinishTabletJobResponse resp;
auto st = _engine.meta_mgr().commit_tablet_job(job, &resp);
+ if (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
+ _tablet->enable_unique_key_merge_on_write()) {
+ int64_t hold_delete_bitmap_lock_time_ms =
+ (MonotonicMicros() - get_delete_bitmap_lock_start_time) / 1000;
+ g_cu_compaction_hold_delete_bitmap_lock_time_ms <<
hold_delete_bitmap_lock_time_ms;
+ }
if (resp.has_alter_version()) {
(static_cast<CloudTablet*>(_tablet.get()))->set_alter_version(resp.alter_version());
}
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index c044b8361b7..75852fbe92c 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -927,7 +927,7 @@ Status CloudTablet::calc_delete_bitmap_for_compaction(
const std::vector<RowsetSharedPtr>& input_rowsets, const
RowsetSharedPtr& output_rowset,
const RowIdConversion& rowid_conversion, ReaderType compaction_type,
int64_t merged_rows,
int64_t initiator, DeleteBitmapPtr& output_rowset_delete_bitmap,
- bool allow_delete_in_cumu_compaction) {
+ bool allow_delete_in_cumu_compaction, int64_t&
get_delete_bitmap_lock_start_time) {
output_rowset_delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id());
std::unique_ptr<RowLocationSet> missed_rows;
if ((config::enable_missing_rows_correctness_check ||
@@ -980,6 +980,7 @@ Status CloudTablet::calc_delete_bitmap_for_compaction(
RETURN_IF_ERROR(_engine.meta_mgr().get_delete_bitmap_update_lock(
*this, COMPACTION_DELETE_BITMAP_LOCK_ID, initiator));
int64_t t2 = MonotonicMicros();
+ get_delete_bitmap_lock_start_time = t2;
RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this));
int64_t t3 = MonotonicMicros();
diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h
index ee3fdc35c6a..8052c39f3a7 100644
--- a/be/src/cloud/cloud_tablet.h
+++ b/be/src/cloud/cloud_tablet.h
@@ -220,13 +220,11 @@ public:
Status save_delete_bitmap_to_ms(int64_t cur_version, int64_t txn_id,
DeleteBitmapPtr delete_bitmap, int64_t
next_visible_version);
- Status calc_delete_bitmap_for_compaction(const
std::vector<RowsetSharedPtr>& input_rowsets,
- const RowsetSharedPtr&
output_rowset,
- const RowIdConversion&
rowid_conversion,
- ReaderType compaction_type,
int64_t merged_rows,
- int64_t initiator,
- DeleteBitmapPtr&
output_rowset_delete_bitmap,
- bool
allow_delete_in_cumu_compaction);
+ Status calc_delete_bitmap_for_compaction(
+ const std::vector<RowsetSharedPtr>& input_rowsets, const
RowsetSharedPtr& output_rowset,
+ const RowIdConversion& rowid_conversion, ReaderType
compaction_type,
+ int64_t merged_rows, int64_t initiator, DeleteBitmapPtr&
output_rowset_delete_bitmap,
+ bool allow_delete_in_cumu_compaction, int64_t&
get_delete_bitmap_lock_start_time);
// Find the missed versions until the spec_version.
//
diff --git a/be/src/runtime/routine_load/routine_load_task_executor.cpp
b/be/src/runtime/routine_load/routine_load_task_executor.cpp
index 8db18d429e7..2cd5f16929c 100644
--- a/be/src/runtime/routine_load/routine_load_task_executor.cpp
+++ b/be/src/runtime/routine_load/routine_load_task_executor.cpp
@@ -62,6 +62,9 @@ using namespace ErrorCode;
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(routine_load_task_count,
MetricUnit::NOUNIT);
+bvar::LatencyRecorder
g_routine_load_commit_and_publish_latency_ms("routine_load",
+
"commit_and_publish_ms");
+
RoutineLoadTaskExecutor::RoutineLoadTaskExecutor(ExecEnv* exec_env) :
_exec_env(exec_env) {
REGISTER_HOOK_METRIC(routine_load_task_count, [this]() {
// std::lock_guard<std::mutex> l(_lock);
@@ -457,7 +460,10 @@ void
RoutineLoadTaskExecutor::exec_task(std::shared_ptr<StreamLoadContext> ctx,
consumer_pool->return_consumers(consumer_grp.get());
// commit txn
+ int64_t commit_and_publish_start_time = MonotonicNanos();
HANDLE_ERROR(_exec_env->stream_load_executor()->commit_txn(ctx.get()),
"commit failed");
+ g_routine_load_commit_and_publish_latency_ms
+ << (MonotonicNanos() - commit_and_publish_start_time) / 1000000;
// commit kafka offset
switch (ctx->load_src_type) {
case TLoadSourceType::KAFKA: {
diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp
index fe887760f7f..6eb4c31a670 100644
--- a/cloud/src/common/bvars.cpp
+++ b/cloud/src/common/bvars.cpp
@@ -117,6 +117,18 @@ bvar::Adder<int64_t> g_bvar_txn_kv_commit_error_counter;
bvar::Window<bvar::Adder<int64_t> >
g_bvar_txn_kv_commit_error_counter_minute("txn_kv", "commit_error",
&g_bvar_txn_kv_commit_error_counter, 60);
bvar::Adder<int64_t> g_bvar_txn_kv_commit_conflict_counter;
bvar::Window<bvar::Adder<int64_t> >
g_bvar_txn_kv_commit_conflict_counter_minute("txn_kv", "commit_conflict",
&g_bvar_txn_kv_commit_conflict_counter, 60);
+bvar::Adder<int64_t> g_bvar_delete_bitmap_lock_txn_put_conflict_counter;
+bvar::Window<bvar::Adder<int64_t> >
g_bvar_delete_bitmap_lock_txn_put_conflict_counter_minute("delete_bitmap_lock",
"txn_put_conflict", &g_bvar_delete_bitmap_lock_txn_put_conflict_counter, 60);
+bvar::Adder<int64_t>
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter;
+bvar::Window<bvar::Adder<int64_t> >
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter_minute("delete_bitmap_lock",
"txn_remove_conflict_by_fail",
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter, 60);
+bvar::Adder<int64_t>
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter;
+bvar::Window<bvar::Adder<int64_t> >
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter_minute("delete_bitmap_lock",
"txn_remove_conflict_by_load",
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter, 60);
+bvar::Adder<int64_t>
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter;
+bvar::Window<bvar::Adder<int64_t> >
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter_minute("delete_bitmap_lock",
"txn_remove_conflict_by_compaction_commit",
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter,
60);
+bvar::Adder<int64_t>
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter;
+bvar::Window<bvar::Adder<int64_t> >
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter_minute("delete_bitmap_lock",
"txn_remove_conflict_by_compaction_lease",
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter, 60);
+bvar::Adder<int64_t>
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter;
+bvar::Window<bvar::Adder<int64_t> >
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter_minute("delete_bitmap_lock",
"txn_remove_conflict_by_compaction_abort",
&g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter, 60);
// fdb's bvars
const int64_t BVAR_FDB_INVALID_VALUE = -99999999L;
diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h
index 7f616615394..dce05dc3c57 100644
--- a/cloud/src/common/bvars.h
+++ b/cloud/src/common/bvars.h
@@ -189,6 +189,16 @@ extern bvar::Adder<int64_t>
g_bvar_txn_kv_commit_error_counter;
extern bvar::Adder<int64_t> g_bvar_txn_kv_commit_conflict_counter;
extern bvar::Adder<int64_t> g_bvar_txn_kv_get_count_normalized;
+extern bvar::Adder<int64_t> g_bvar_delete_bitmap_lock_txn_put_conflict_counter;
+extern bvar::Adder<int64_t>
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter;
+extern bvar::Adder<int64_t>
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter;
+extern bvar::Adder<int64_t>
+
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter;
+extern bvar::Adder<int64_t>
+
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter;
+extern bvar::Adder<int64_t>
+
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter;
+
extern const int64_t BVAR_FDB_INVALID_VALUE;
extern bvar::Status<int64_t> g_bvar_fdb_client_count;
extern bvar::Status<int64_t> g_bvar_fdb_configuration_coordinators_count;
diff --git a/cloud/src/meta-service/meta_service.cpp
b/cloud/src/meta-service/meta_service.cpp
index f5b904a6f66..2de89f26786 100644
--- a/cloud/src/meta-service/meta_service.cpp
+++ b/cloud/src/meta-service/meta_service.cpp
@@ -2312,6 +2312,9 @@ void
MetaServiceImpl::update_delete_bitmap(google::protobuf::RpcController* cont
total_txn_size += txn->approximate_bytes();
total_txn_count++;
if (err != TxnErrorCode::TXN_OK) {
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ g_bvar_delete_bitmap_lock_txn_put_conflict_counter << 1;
+ }
code = cast_as<ErrCategory::COMMIT>(err);
ss << "failed to update delete bitmap, err=" << err << " tablet_id="
<< tablet_id
<< " lock_id=" << request->lock_id() << " initiator=" <<
request->initiator()
@@ -2790,12 +2793,18 @@ void MetaServiceImpl::get_delete_bitmap_update_lock_v2(
request->lock_id() == COMPACTION_DELETE_BITMAP_LOCK_ID &&
config::delete_bitmap_enable_retry_txn_conflict &&
first_retry) {
// if err is TXN_CONFLICT, and the lock id is -1, do a fast retry
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ g_bvar_delete_bitmap_lock_txn_put_conflict_counter << 1;
+ }
LOG(INFO) << "fast retry to get_delete_bitmap_update_lock,
tablet_id="
<< request->table_id() << " lock_id=" <<
request->lock_id()
<< ", initiator=" << request->initiator() << ", err=" <<
err;
first_retry = false;
continue;
} else {
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ g_bvar_delete_bitmap_lock_txn_put_conflict_counter << 1;
+ }
code = cast_as<ErrCategory::COMMIT>(err);
ss << "failed to get_delete_bitmap_update_lock, lock_id=" <<
request->lock_id()
<< ", initiator=" << request->initiator() << ", err=" << err;
@@ -3176,6 +3185,9 @@ void MetaServiceImpl::remove_delete_bitmap_update_lock_v2(
}
err = txn->commit();
if (err != TxnErrorCode::TXN_OK) {
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ g_bvar_delete_bitmap_lock_txn_remove_conflict_by_fail_counter << 1;
+ }
code = cast_as<ErrCategory::COMMIT>(err);
ss << "failed to remove delete bitmap tablet lock , err=" << err;
msg = ss.str();
diff --git a/cloud/src/meta-service/meta_service_job.cpp
b/cloud/src/meta-service/meta_service_job.cpp
index f4897b6ed44..b00d834e322 100644
--- a/cloud/src/meta-service/meta_service_job.cpp
+++ b/cloud/src/meta-service/meta_service_job.cpp
@@ -25,6 +25,7 @@
#include <cstddef>
#include <sstream>
+#include "common/bvars.h"
#include "common/config.h"
#include "common/logging.h"
#include "common/util.h"
@@ -1561,18 +1562,29 @@ void
MetaServiceImpl::finish_tablet_job(::google::protobuf::RpcController* contr
recorded_job.ParseFromString(job_val);
VLOG_DEBUG << "get tablet job, tablet_id=" << tablet_id
<< " job=" << proto_to_json(recorded_job);
+ FinishTabletJobRequest_Action action = request->action();
- std::unique_ptr<int, std::function<void(int*)>> defer_commit(
- (int*)0x01, [&ss, &txn, &code, &msg, &need_commit](int*) {
- if (!need_commit) return;
- TxnErrorCode err = txn->commit();
- if (err != TxnErrorCode::TXN_OK) {
- code = cast_as<ErrCategory::COMMIT>(err);
- ss << "failed to commit job kv, err=" << err;
- msg = ss.str();
- return;
+ std::unique_ptr<int, std::function<void(int*)>> defer_commit((int*)0x01,
[&ss, &txn, &code,
+
&msg, &need_commit,
+
&action](int*) {
+ if (!need_commit) return;
+ TxnErrorCode err = txn->commit();
+ if (err != TxnErrorCode::TXN_OK) {
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ if (action == FinishTabletJobRequest::COMMIT) {
+
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_commit_counter << 1;
+ } else if (action == FinishTabletJobRequest::LEASE) {
+
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_lease_counter << 1;
+ } else if (action == FinishTabletJobRequest::ABORT) {
+
g_bvar_delete_bitmap_lock_txn_remove_conflict_by_compaction_abort_counter << 1;
}
- });
+ }
+ code = cast_as<ErrCategory::COMMIT>(err);
+ ss << "failed to commit job kv, err=" << err;
+ msg = ss.str();
+ return;
+ }
+ });
// Process compaction commit
if (!request->job().compaction().empty()) {
diff --git a/cloud/src/meta-service/meta_service_txn.cpp
b/cloud/src/meta-service/meta_service_txn.cpp
index 490c7bb616b..7f009004003 100644
--- a/cloud/src/meta-service/meta_service_txn.cpp
+++ b/cloud/src/meta-service/meta_service_txn.cpp
@@ -1379,6 +1379,9 @@ void commit_txn_immediately(
// Finally we are done...
err = txn->commit();
if (err != TxnErrorCode::TXN_OK) {
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter
<< 1;
+ }
code = cast_as<ErrCategory::COMMIT>(err);
ss << "failed to commit kv txn, txn_id=" << txn_id << " err=" <<
err;
msg = ss.str();
@@ -1893,6 +1896,9 @@ void commit_txn_eventually(
err = txn->commit();
if (err != TxnErrorCode::TXN_OK) {
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter
<< 1;
+ }
code = cast_as<ErrCategory::COMMIT>(err);
ss << "failed to commit kv txn, txn_id=" << txn_id << " err=" <<
err;
msg = ss.str();
@@ -2482,6 +2488,9 @@ void commit_txn_with_sub_txn(const CommitTxnRequest*
request, CommitTxnResponse*
// Finally we are done...
err = txn->commit();
if (err != TxnErrorCode::TXN_OK) {
+ if (err == TxnErrorCode::TXN_CONFLICT) {
+ g_bvar_delete_bitmap_lock_txn_remove_conflict_by_load_counter << 1;
+ }
if (err == TxnErrorCode::TXN_VALUE_TOO_LARGE || err ==
TxnErrorCode::TXN_BYTES_TOO_LARGE) {
size_t max_size = 0, max_num_segments = 0,
min_num_segments = std::numeric_limits<size_t>::max(),
avg_num_segments = 0;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
index 40e5bb834df..520539d0704 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java
@@ -156,9 +156,50 @@ public class CloudGlobalTransactionMgr implements
GlobalTransactionMgrIface {
private static final Logger LOG =
LogManager.getLogger(CloudGlobalTransactionMgr.class);
private static final String NOT_SUPPORTED_MSG = "Not supported in cloud
mode";
+ class CommitCostTimeStatistic {
+ long waitCommitLockCostTimeMs = 0;
+ long waitDeleteBitmapLockCostTimeMs = 0;
+ long calculateDeleteBitmapCostTimeMs = 0;
+
+ long commitToMsCostTimeMs = 0;
+
+ public long getCommitToMsCostTimeMs() {
+ return commitToMsCostTimeMs;
+ }
+
+ public void setCommitToMsCostTimeMs(long commitToMsCostTimeMs) {
+ this.commitToMsCostTimeMs = commitToMsCostTimeMs;
+ }
+
+ public long getCalculateDeleteBitmapCostTimeMs() {
+ return calculateDeleteBitmapCostTimeMs;
+ }
+
+ public void setCalculateDeleteBitmapCostTimeMs(long
calculateDeleteBitmapCostTimeMs) {
+ this.calculateDeleteBitmapCostTimeMs =
calculateDeleteBitmapCostTimeMs;
+ }
+
+ public long getWaitCommitLockCostTimeMs() {
+ return waitCommitLockCostTimeMs;
+ }
+
+ public void setWaitCommitLockCostTimeMs(long waitCommitLockCostTimeMs)
{
+ this.waitCommitLockCostTimeMs = waitCommitLockCostTimeMs;
+ }
+
+ public long getWaitDeleteBitmapLockCostTimeMs() {
+ return waitDeleteBitmapLockCostTimeMs;
+ }
+
+ public void setWaitDeleteBitmapLockCostTimeMs(long
waitDeleteBitmapLockCostTimeMs) {
+ this.waitDeleteBitmapLockCostTimeMs =
waitDeleteBitmapLockCostTimeMs;
+ }
+ }
+
private TxnStateCallbackFactory callbackFactory;
private final Map<Long, Long> subTxnIdToTxnId = new ConcurrentHashMap<>();
private Map<Long, AtomicInteger> waitToCommitTxnCountMap = new
ConcurrentHashMap<>();
+ private Map<Long, CommitCostTimeStatistic> commitCostTimeStatisticMap =
new ConcurrentHashMap<>();
// dbId -> tableId -> txnId
private Map<Long, Map<Long, Long>> lastTxnIdMap = Maps.newConcurrentMap();
@@ -2140,6 +2181,15 @@ public class CloudGlobalTransactionMgr implements
GlobalTransactionMgrIface {
}
}
+ @Override
+ public int getQueueLength() {
+ int count = 0;
+ for (Map.Entry<Long, AtomicInteger> entry :
waitToCommitTxnCountMap.entrySet()) {
+ count += entry.getValue().get();
+ }
+ return count;
+ }
+
private void decreaseWaitingLockCount(List<Table> tableList) {
for (int i = 0; i < tableList.size(); i++) {
long tableId = tableList.get(i).getId();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index 3e216739720..4e3257c3822 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -161,6 +161,11 @@ public final class MetricRepo {
public static Histogram HISTO_COMMIT_AND_PUBLISH_LATENCY;
+ public static Histogram HISTO_GET_DELETE_BITMAP_UPDATE_LOCK_LATENCY;
+ public static Histogram HISTO_GET_COMMIT_LOCK_LATENCY;
+ public static Histogram HISTO_CALCULATE_DELETE_BITMAP_LATENCY;
+ public static Histogram HISTO_COMMIT_TO_MS_LATENCY;
+
// Catlaog/Database/Table num
public static GaugeMetric<Integer> GAUGE_CATALOG_NUM;
public static GaugeMetric<Integer> GAUGE_INTERNAL_DATABASE_NUM;
@@ -589,6 +594,24 @@ public final class MetricRepo {
HISTO_COMMIT_AND_PUBLISH_LATENCY = METRIC_REGISTER.histogram(
MetricRegistry.name("txn_commit_and_publish", "latency",
"ms"));
+ GaugeMetric<Integer> commitQueueLength = new
GaugeMetric<Integer>("commit_queue_length",
+ MetricUnit.NOUNIT, "commit queue length") {
+ @Override
+ public Integer getValue() {
+ return
Env.getCurrentEnv().getGlobalTransactionMgr().getQueueLength();
+ }
+ };
+ DORIS_METRIC_REGISTER.addMetrics(commitQueueLength);
+
+ HISTO_GET_DELETE_BITMAP_UPDATE_LOCK_LATENCY =
METRIC_REGISTER.histogram(
+ MetricRegistry.name("get_delete_bitmap_update_lock",
"latency", "ms"));
+ HISTO_GET_COMMIT_LOCK_LATENCY = METRIC_REGISTER.histogram(
+ MetricRegistry.name("get_commit_lock", "latency", "ms"));
+ HISTO_CALCULATE_DELETE_BITMAP_LATENCY = METRIC_REGISTER.histogram(
+ MetricRegistry.name("calculate_delete_bitmap", "latency",
"ms"));
+ HISTO_COMMIT_TO_MS_LATENCY = METRIC_REGISTER.histogram(
+ MetricRegistry.name("commit_to_ms", "latency", "ms"));
+
GAUGE_CATALOG_NUM = new GaugeMetric<Integer>("catalog_num",
MetricUnit.NOUNIT, "total catalog num") {
@Override
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
index 6552c65d412..64a91459fbe 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgr.java
@@ -999,4 +999,9 @@ public class GlobalTransactionMgr implements
GlobalTransactionMgrIface {
LOG.warn("remove sub transaction failed. db " + dbId, e);
}
}
+
+ @Override
+ public int getQueueLength() {
+ return 0;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
index 38c8cdd5a6d..c23787eca4d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/transaction/GlobalTransactionMgrIface.java
@@ -206,4 +206,6 @@ public interface GlobalTransactionMgrIface extends Writable
{
public List<TransactionState> getUnFinishedPreviousLoad(long
endTransactionId,
long dbId, List<Long> tableIdList) throws UserException;
+
+ public int getQueueLength();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]