This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 25638e66da8 [enhance](meta-service)add bvar for fdb process status
(#52882)
25638e66da8 is described below
commit 25638e66da8e6221641b1f2240e455abdefdc1db
Author: koarz <[email protected]>
AuthorDate: Fri Jul 11 13:20:05 2025 +0800
[enhance](meta-service)add bvar for fdb process status (#52882)
---
cloud/src/common/bvars.cpp | 2 +
cloud/src/common/bvars.h | 2 +
cloud/src/common/metric.cpp | 70 ++++++++++++++++++++++++++
cloud/test/metric_test.cpp | 117 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 191 insertions(+)
diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp
index 3ca961afffb..1e9e7c4ede4 100644
--- a/cloud/src/common/bvars.cpp
+++ b/cloud/src/common/bvars.cpp
@@ -211,6 +211,8 @@ bvar::Status<int64_t>
g_bvar_fdb_workload_transactions_started_hz("fdb_workload_
bvar::Status<int64_t>
g_bvar_fdb_workload_transactions_committed_hz("fdb_workload_transactions_committed_hz",
BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t>
g_bvar_fdb_workload_transactions_rejected_hz("fdb_workload_transactions_rejected_hz",
BVAR_FDB_INVALID_VALUE);
bvar::Status<int64_t>
g_bvar_fdb_client_thread_busyness_percent("fdb_client_thread_busyness_percent",
BVAR_FDB_INVALID_VALUE);
+mBvarStatus<int64_t> g_bvar_fdb_process_status_int("fdb_process_status_int",
{"process_id", "component", "metric"});
+mBvarStatus<double>
g_bvar_fdb_process_status_float("fdb_process_status_float", {"process_id",
"component", "metric"});
// checker's bvars
BvarStatusWithTag<int64_t> g_bvar_checker_num_scanned("checker",
"num_scanned");
diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h
index 6034afe7112..d9dfb544d1a 100644
--- a/cloud/src/common/bvars.h
+++ b/cloud/src/common/bvars.h
@@ -348,6 +348,8 @@ extern bvar::Status<int64_t>
g_bvar_fdb_workload_transactions_started_hz;
extern bvar::Status<int64_t> g_bvar_fdb_workload_transactions_committed_hz;
extern bvar::Status<int64_t> g_bvar_fdb_workload_transactions_rejected_hz;
extern bvar::Status<int64_t> g_bvar_fdb_client_thread_busyness_percent;
+extern mBvarStatus<int64_t> g_bvar_fdb_process_status_int;
+extern mBvarStatus<double> g_bvar_fdb_process_status_float;
// checker
extern BvarStatusWithTag<long> g_bvar_checker_num_scanned;
diff --git a/cloud/src/common/metric.cpp b/cloud/src/common/metric.cpp
index a9b91c6c853..124a5f26a06 100644
--- a/cloud/src/common/metric.cpp
+++ b/cloud/src/common/metric.cpp
@@ -17,10 +17,12 @@
#include "metric.h"
+#include <glog/logging.h>
#include <rapidjson/document.h>
#include <rapidjson/encodings.h>
#include <rapidjson/error/en.h>
+#include <cstdint>
#include <memory>
#include <optional>
#include <string>
@@ -28,6 +30,7 @@
#include <vector>
#include "common/bvars.h"
+#include "common/logging.h"
#include "meta-store/txn_kv.h"
#include "meta-store/txn_kv_error.h"
@@ -134,6 +137,68 @@ static void export_fdb_status_details(const std::string&
status_str) {
DCHECK(node->value.IsDouble());
return static_cast<int64_t>(node->value.GetDouble() * NANOSECONDS);
};
+ auto get_process_metric = [&](std::string component) {
+ auto node = document.FindMember("cluster");
+ if (!node->value.HasMember("processes")) return;
+ node = node->value.FindMember("processes");
+ // process
+ for (auto process_node = node->value.MemberBegin(); process_node !=
node->value.MemberEnd();
+ process_node++) {
+ const char* process_id = process_node->name.GetString();
+ decltype(process_node) component_node;
+ // get component iter
+ if (!process_node->value.HasMember(component.data())) return;
+ component_node = process_node->value.FindMember(component.data());
+ // There are three cases here: int64, double, and object.
+ // If it is double or int64, put it directly into the bvar.
+ // If it is an object, recursively obtain the full name and
corresponding value.
+ // such as: {"disk": {"reads": {"counter": 123, "hz": 0}}}
+ // component is "disk", the names of these two values should be
"reads_counter" and "reads_hz"
+ auto recursive_name_helper = [](std::string& origin_name,
+ const char* next_level_name) ->
std::string {
+ return origin_name + '_' + next_level_name;
+ };
+ // proved two type lambda func to handle object and other type
+
+ // set_bvar_value is responsible for setting integer and float
values to the corresponding bvar.
+ auto set_bvar_value = [&process_id, &component](
+ std::string& name,
+ decltype(process_node)& temp_node)
-> void {
+ if (temp_node->value.IsInt64()) {
+ g_bvar_fdb_process_status_int.put({process_id, component,
name},
+
temp_node->value.GetInt64());
+ return;
+ }
+ if (temp_node->value.IsDouble()) {
+ g_bvar_fdb_process_status_float.put({process_id,
component, name},
+
temp_node->value.GetDouble());
+ return;
+ }
+ LOG(WARNING) << fmt::format(
+ "Get process metrics set_bvar_value input a wrong type
node {}", name);
+ };
+ auto object_recursive = [&set_bvar_value, &recursive_name_helper](
+ auto&& self, std::string name,
+ decltype(process_node) temp_node)
-> void {
+ // if the node is an object, then get Member(iter) and
recursive with iter as arg
+ if (temp_node->value.IsObject()) {
+ for (auto iter = temp_node->value.MemberBegin();
+ iter != temp_node->value.MemberEnd(); iter++) {
+ self(self, recursive_name_helper(name,
iter->name.GetString()), iter);
+ }
+ return;
+ }
+ // if not object, set bvar value
+ set_bvar_value(name, temp_node);
+ };
+ // Note that the parameter passed to set_bvar_value here is the
current node, not its Member
+ // so we can directly call object_recursive in the loop
+ for (auto metric_node = component_node->value.MemberBegin();
+ metric_node != component_node->value.MemberEnd();
metric_node++) {
+ object_recursive(object_recursive,
metric_node->name.GetString(), metric_node);
+ }
+ }
+ };
// Configuration
g_bvar_fdb_configuration_coordinators_count.set_value(
get_value({"configuration", "coordinators_count"}));
@@ -226,6 +291,11 @@ static void export_fdb_status_details(const std::string&
status_str) {
}
}
}
+
+ // Process Status
+ get_process_metric("cpu");
+ get_process_metric("disk");
+ get_process_metric("memory");
}
void FdbMetricExporter::export_fdb_metrics(TxnKv* txn_kv) {
diff --git a/cloud/test/metric_test.cpp b/cloud/test/metric_test.cpp
index 31a2b7b3c58..81174c73924 100644
--- a/cloud/test/metric_test.cpp
+++ b/cloud/test/metric_test.cpp
@@ -172,4 +172,121 @@ TEST(MetricTest, FdbMetricExporterTest) {
ASSERT_EQ(g_bvar_fdb_machines_count.get_value(),
BVAR_FDB_INVALID_VALUE);
ASSERT_EQ(g_bvar_fdb_client_count.get_value(), BVAR_FDB_INVALID_VALUE);
}
+
+ // process status
+ {
+ g_bvar_fdb_machines_count.set_value(BVAR_FDB_INVALID_VALUE);
+ g_bvar_fdb_client_count.set_value(BVAR_FDB_INVALID_VALUE);
+
+ std::string fdb_metric_example = "./fdb_metric_example.json";
+ std::ifstream inFile(fdb_metric_example);
+
+ ASSERT_TRUE(inFile.is_open());
+ std::string fileContent((std::istreambuf_iterator<char>(inFile)),
+ std::istreambuf_iterator<char>());
+
+ std::shared_ptr<TxnKv> txn_kv = std::make_shared<MemTxnKv>();
+ std::unique_ptr<Transaction> txn;
+ ASSERT_EQ(txn_kv->create_txn(&txn), TxnErrorCode::TXN_OK);
+ txn->put("\xff\xff/status/json", fileContent);
+ ASSERT_EQ(txn->commit(), TxnErrorCode::TXN_OK);
+
+ FdbMetricExporter fdb_metric_exporter(txn_kv);
+ fdb_metric_exporter.sleep_interval_ms_ = 1;
+ fdb_metric_exporter.start();
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+ fdb_metric_exporter.stop();
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "cpu",
"usage_cores"}),
+ 0.0012292);
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"busy"}),
+ 0.0085999800000000001);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"free_bytes"}),
+ 490412584960);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"reads_counter"}),
+ 854857);
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"reads_hz"}),
+ 0);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"reads_sectors"}),
+ 0);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"total_bytes"}),
+ 527295578112);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"writes_counter"}),
+ 73765457);
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"writes_hz"}),
+ 26.1999);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "disk",
"writes_sectors"}),
+ 1336);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "memory",
"available_bytes"}),
+ 3065090867);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "memory",
"limit_bytes"}),
+ 8589934592);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "memory",
"rss_bytes"}),
+ 46551040);
+
ASSERT_EQ(g_bvar_fdb_process_status_int.get({"09ca90b9f3f413e5816b2610ed8b465d",
"memory",
+
"unused_allocated_memory"}),
+ 655360);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"09ca90b9f3f413e5816b2610ed8b465d", "memory",
"used_bytes"}),
+ 122974208);
+
+ // test second process
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "cpu",
"usage_cores"}),
+ 0.0049765900000000004);
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"busy"}),
+ 0.012200000000000001);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"free_bytes"}),
+ 489160159232);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"reads_counter"}),
+ 877107);
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"reads_hz"}),
+ 0);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"reads_sectors"}),
+ 0);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"total_bytes"}),
+ 527295578112);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"writes_counter"}),
+ 79316112);
+ ASSERT_EQ(g_bvar_fdb_process_status_float.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"writes_hz"}),
+ 30.9999);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "disk",
"writes_sectors"}),
+ 744);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "memory",
"available_bytes"}),
+ 3076787404);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "memory",
"limit_bytes"}),
+ 8589934592);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "memory",
"rss_bytes"}),
+ 72359936);
+
ASSERT_EQ(g_bvar_fdb_process_status_int.get({"0a456165f04e1ec1a2ade0ce523d54a8",
"memory",
+
"unused_allocated_memory"}),
+ 393216);
+ ASSERT_EQ(g_bvar_fdb_process_status_int.get(
+ {"0a456165f04e1ec1a2ade0ce523d54a8", "memory",
"used_bytes"}),
+ 157978624);
+ }
}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]