This is an automated email from the ASF dual-hosted git repository.
ivandika pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-5713 by this push:
new d2cd69ac34 HDDS-12436. [DiskBalancer] Add metrics for time spent by
container move (#8001)
d2cd69ac34 is described below
commit d2cd69ac348225f9d774511e84d9b4ab2b3fc257
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Sat Mar 8 16:59:42 2025 +0530
HDDS-12436. [DiskBalancer] Add metrics for time spent by container move
(#8001)
---
.../container/diskbalancer/DiskBalancerService.java | 9 +++++++++
.../diskbalancer/DiskBalancerServiceMetrics.java | 17 +++++++++++++++++
2 files changed, 26 insertions(+)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
index 968aa45494..ac1ee19123 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
@@ -392,6 +392,8 @@ private class DiskBalancerTask implements BackgroundTask {
@Override
public BackgroundTaskResult call() {
+ long startTime = Time.monotonicNow();
+ boolean moveSucceeded = true;
long containerId = containerData.getContainerID();
boolean destVolumeIncreased = false;
Path diskBalancerTmpDir = null, diskBalancerDestDir = null;
@@ -449,6 +451,7 @@ public BackgroundTaskResult call() {
metrics.incrSuccessCount(1);
metrics.incrSuccessBytes(containerSize);
} catch (IOException e) {
+ moveSucceeded = false;
if (diskBalancerTmpDir != null) {
try {
Files.deleteIfExists(diskBalancerTmpDir);
@@ -472,6 +475,12 @@ public BackgroundTaskResult call() {
}
metrics.incrFailureCount();
} finally {
+ long endTime = Time.monotonicNow();
+ if (moveSucceeded) {
+ metrics.getMoveSuccessTime().add(endTime - startTime);
+ } else {
+ metrics.getMoveFailureTime().add(endTime - startTime);
+ }
postCall();
}
return BackgroundTaskResult.EmptyTaskResult.newResult();
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerServiceMetrics.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerServiceMetrics.java
index 6f967375ec..9d6dedf5bb 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerServiceMetrics.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerServiceMetrics.java
@@ -22,6 +22,7 @@
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableRate;
/**
* Metrics related to DiskBalancer Service running on Datanode.
@@ -43,6 +44,12 @@ public final class DiskBalancerServiceMetrics {
@Metric(about = "The number of failed balance job.")
private MutableCounterLong failureCount;
+ @Metric(about = "The time spent on successful container moves.")
+ private MutableRate moveSuccessTime;
+
+ @Metric(about = "The time spent on failed container moves.")
+ private MutableRate moveFailureTime;
+
@Metric(about = "The number of total running loop")
private MutableCounterLong runningLoopCount;
@@ -122,12 +129,22 @@ public long getIdleLoopExceedsBandwidthCount() {
return idleLoopExceedsBandwidthCount.value();
}
+ public MutableRate getMoveSuccessTime() {
+ return moveSuccessTime;
+ }
+
+ public MutableRate getMoveFailureTime() {
+ return moveFailureTime;
+ }
+
@Override
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("successCount = " + successCount.value()).append("\t")
.append("successBytes = " + successBytes.value()).append("\t")
.append("failureCount = " + failureCount.value()).append("\t")
+ .append("moveSuccessTime = " +
moveSuccessTime.lastStat().mean()).append("\t")
+ .append("moveFailureTime = " +
moveFailureTime.lastStat().mean()).append("\t")
.append("idleLoopNoAvailableVolumePairCount = " +
idleLoopNoAvailableVolumePairCount.value()).append("\t")
.append("idleLoopExceedsBandwidthCount = " +
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]