This is an automated email from the ASF dual-hosted git repository.
siyao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 81295a53cb9 HDDS-13013. [Snapshot] Add metrics and tests for snapshot
operations. (#8436)
81295a53cb9 is described below
commit 81295a53cb943e0b60cb9eec4f480619e5668418
Author: Wei-Chiu Chuang <[email protected]>
AuthorDate: Sat Jun 7 00:22:36 2025 -0700
HDDS-13013. [Snapshot] Add metrics and tests for snapshot operations.
(#8436)
---
.../org/apache/hadoop/ozone/om/TestOmMetrics.java | 89 ++++++++++++++++++++++
.../java/org/apache/hadoop/ozone/om/OMMetrics.java | 30 ++++++++
.../org/apache/hadoop/ozone/om/OzoneManager.java | 4 +
.../request/snapshot/OMSnapshotRenameRequest.java | 7 +-
4 files changed, 129 insertions(+), 1 deletion(-)
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java
index 683def25097..bc7d50a9007 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOmMetrics.java
@@ -561,6 +561,7 @@ public void testDirectoryOps(BucketLayout bucketLayout)
throws Exception {
assertEquals(initialNumKeyRenames + expectedRenames,
getLongCounter("NumKeyRenames", omMetrics));
}
+ @SuppressWarnings("checkstyle:methodlength")
@Test
public void testSnapshotOps() throws Exception {
// This tests needs enough dataNodes to allocate the blocks for the keys.
@@ -568,12 +569,22 @@ public void testSnapshotOps() throws Exception {
MetricsRecordBuilder omMetrics = getMetrics("OMMetrics");
long initialNumSnapshotCreateFails =
getLongCounter("NumSnapshotCreateFails", omMetrics);
long initialNumSnapshotCreates = getLongCounter("NumSnapshotCreates",
omMetrics);
+ long initialNumSnapshotInfoFails = getLongCounter("NumSnapshotInfoFails",
omMetrics);
+ long initialNumSnapshotInfos = getLongCounter("NumSnapshotInfos",
omMetrics);
long initialNumSnapshotListFails = getLongCounter("NumSnapshotListFails",
omMetrics);
long initialNumSnapshotLists = getLongCounter("NumSnapshotLists",
omMetrics);
long initialNumSnapshotActive = getLongCounter("NumSnapshotActive",
omMetrics);
long initialNumSnapshotDeleted = getLongCounter("NumSnapshotDeleted",
omMetrics);
+ long initialNumSnapshotDeletes = getLongCounter("NumSnapshotDeletes",
omMetrics);
+ long initialNumSnapshotDeleteFails =
getLongCounter("NumSnapshotDeleteFails", omMetrics);
long initialNumSnapshotDiffJobs = getLongCounter("NumSnapshotDiffJobs",
omMetrics);
long initialNumSnapshotDiffJobFails =
getLongCounter("NumSnapshotDiffJobFails", omMetrics);
+ long initialNumListSnapshotDiffJobs =
getLongCounter("NumListSnapshotDiffJobs", omMetrics);
+ long initialNumListSnapshotDiffJobFails =
getLongCounter("NumListSnapshotDiffJobFails", omMetrics);
+ long initialNumCancelSnapshotDiffs =
getLongCounter("NumCancelSnapshotDiffs", omMetrics);
+ long initialNumCancelSnapshotDiffFails =
getLongCounter("NumCancelSnapshotDiffFails", omMetrics);
+ long initialNumSnapshotRenames = getLongCounter("NumSnapshotRenames",
omMetrics);
+ long initialNumSnapshotRenameFails =
getLongCounter("NumSnapshotRenameFails", omMetrics);
OmBucketInfo omBucketInfo = createBucketInfo(false);
@@ -581,6 +592,7 @@ public void testSnapshotOps() throws Exception {
String bucketName = omBucketInfo.getBucketName();
String snapshot1 = "snap1";
String snapshot2 = "snap2";
+ String snapshot3 = "snap3";
writeClient.createBucket(omBucketInfo);
@@ -624,9 +636,52 @@ public void testSnapshotOps() throws Exception {
}
}
omMetrics = getMetrics("OMMetrics");
+
assertEquals(initialNumSnapshotDiffJobs + 1,
getLongCounter("NumSnapshotDiffJobs", omMetrics));
assertEquals(initialNumSnapshotDiffJobFails,
getLongCounter("NumSnapshotDiffJobFails", omMetrics));
+ // List snapshot diff jobs
+ writeClient.listSnapshotDiffJobs(volumeName, bucketName, "", true, null,
1000);
+
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumListSnapshotDiffJobs + 1,
getLongCounter("NumListSnapshotDiffJobs", omMetrics));
+ assertEquals(initialNumListSnapshotDiffJobFails,
getLongCounter("NumListSnapshotDiffJobFails", omMetrics));
+
+ // List snapshot diff jobs: invalid bucket case.
+ assertThrows(OMException.class, () ->
+ writeClient.listSnapshotDiffJobs(volumeName, "invalidBucket", "",
true, null, 1000));
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumListSnapshotDiffJobs + 2,
getLongCounter("NumListSnapshotDiffJobs", omMetrics));
+ assertEquals(initialNumListSnapshotDiffJobFails + 1,
getLongCounter("NumListSnapshotDiffJobFails", omMetrics));
+
+ // Cancel snapshot diff
+ writeClient.cancelSnapshotDiff(volumeName, bucketName, snapshot1,
snapshot2);
+
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumCancelSnapshotDiffs + 1,
getLongCounter("NumCancelSnapshotDiffs", omMetrics));
+ assertEquals(initialNumCancelSnapshotDiffFails,
getLongCounter("NumCancelSnapshotDiffFails", omMetrics));
+
+ // Cancel snapshot diff job: invalid bucket case.
+ assertThrows(OMException.class, () ->
+ writeClient.cancelSnapshotDiff(volumeName, "invalidBucket", snapshot1,
snapshot2));
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumCancelSnapshotDiffs + 2,
getLongCounter("NumCancelSnapshotDiffs", omMetrics));
+ assertEquals(initialNumCancelSnapshotDiffFails + 1,
getLongCounter("NumCancelSnapshotDiffFails", omMetrics));
+
+ // Get snapshot info
+ writeClient.getSnapshotInfo(volumeName, bucketName, snapshot1);
+
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumSnapshotInfos + 1,
getLongCounter("NumSnapshotInfos", omMetrics));
+ assertEquals(initialNumSnapshotInfoFails,
getLongCounter("NumSnapshotInfoFails", omMetrics));
+
+ // Get snapshot info: invalid snapshot case.
+ assertThrows(OMException.class, () ->
+ writeClient.getSnapshotInfo(volumeName, bucketName,
"invalidSnapshot"));
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumSnapshotInfos + 2,
getLongCounter("NumSnapshotInfos", omMetrics));
+ assertEquals(initialNumSnapshotInfoFails + 1,
getLongCounter("NumSnapshotInfoFails", omMetrics));
+
// List snapshots
writeClient.listSnapshot(
volumeName, bucketName, null, null, Integer.MAX_VALUE);
@@ -643,6 +698,40 @@ public void testSnapshotOps() throws Exception {
omMetrics = getMetrics("OMMetrics");
assertEquals(initialNumSnapshotLists + 2,
getLongCounter("NumSnapshotLists", omMetrics));
assertEquals(initialNumSnapshotListFails + 1,
getLongCounter("NumSnapshotListFails", omMetrics));
+
+ // Rename snapshot
+ writeClient.renameSnapshot(volumeName, bucketName, snapshot2, snapshot3);
+
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumSnapshotActive + 2,
getLongCounter("NumSnapshotActive", omMetrics));
+ assertEquals(initialNumSnapshotRenames + 1,
getLongCounter("NumSnapshotRenames", omMetrics));
+ assertEquals(initialNumSnapshotRenameFails,
getLongCounter("NumSnapshotRenameFails", omMetrics));
+
+ // Rename snapshot: invalid snapshot case.
+ assertThrows(OMException.class, () ->
writeClient.renameSnapshot(volumeName,
+ bucketName, snapshot2, snapshot3));
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumSnapshotActive + 2,
getLongCounter("NumSnapshotActive", omMetrics));
+ assertEquals(initialNumSnapshotRenames + 2,
getLongCounter("NumSnapshotRenames", omMetrics));
+ assertEquals(initialNumSnapshotRenameFails + 1,
getLongCounter("NumSnapshotRenameFails", omMetrics));
+
+ // Delete snapshot
+ writeClient.deleteSnapshot(volumeName, bucketName, snapshot3);
+
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumSnapshotActive + 1,
getLongCounter("NumSnapshotActive", omMetrics));
+ assertEquals(initialNumSnapshotDeletes + 1,
getLongCounter("NumSnapshotDeletes", omMetrics));
+ assertEquals(initialNumSnapshotDeleted + 1,
getLongCounter("NumSnapshotDeleted", omMetrics));
+ assertEquals(initialNumSnapshotDeleteFails,
getLongCounter("NumSnapshotDeleteFails", omMetrics));
+
+ // Delete snapshot: invalid snapshot case.
+ assertThrows(OMException.class, () ->
writeClient.deleteSnapshot(volumeName,
+ bucketName, snapshot3));
+ omMetrics = getMetrics("OMMetrics");
+ assertEquals(initialNumSnapshotActive + 1,
getLongCounter("NumSnapshotActive", omMetrics));
+ assertEquals(initialNumSnapshotDeletes + 2,
getLongCounter("NumSnapshotDeletes", omMetrics));
+ assertEquals(initialNumSnapshotDeleted + 1,
getLongCounter("NumSnapshotDeleted", omMetrics));
+ assertEquals(initialNumSnapshotDeleteFails + 1,
getLongCounter("NumSnapshotDeleteFails", omMetrics));
}
private OMMetadataManager mockWritePathExceptions(
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
index c294325ef2b..f69b9c4986a 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
@@ -71,10 +71,13 @@ public class OMMetrics implements OmMetadataReaderMetrics {
private @Metric MutableCounterLong numSnapshotCreates;
private @Metric MutableCounterLong numSnapshotDeletes;
private @Metric MutableCounterLong numSnapshotLists;
+ private @Metric MutableCounterLong numSnapshotRenames;
private @Metric MutableCounterLong numSnapshotDiffJobs;
private @Metric MutableCounterLong numSnapshotInfos;
private @Metric MutableCounterLong numSnapshotPurges;
private @Metric MutableCounterLong numSnapshotSetProperties;
+ private @Metric MutableCounterLong numCancelSnapshotDiffs;
+ private @Metric MutableCounterLong numListSnapshotDiffJobs;
private @Metric MutableGaugeInt numSnapshotCacheSize;
private @Metric MutableCounterLong numGetFileStatus;
@@ -138,12 +141,15 @@ public class OMMetrics implements OmMetadataReaderMetrics
{
private @Metric MutableCounterLong numOpenKeyDeleteRequestFails;
private @Metric MutableCounterLong numExpiredMPUAbortRequestFails;
private @Metric MutableCounterLong numSnapshotCreateFails;
+ private @Metric MutableCounterLong numSnapshotRenameFails;
private @Metric MutableCounterLong numSnapshotDeleteFails;
private @Metric MutableCounterLong numSnapshotListFails;
private @Metric MutableCounterLong numSnapshotDiffJobFails;
private @Metric MutableCounterLong numSnapshotInfoFails;
private @Metric MutableCounterLong numSnapshotPurgeFails;
private @Metric MutableCounterLong numSnapshotSetPropertyFails;
+ private @Metric MutableCounterLong numCancelSnapshotDiffFails;
+ private @Metric MutableCounterLong numListSnapshotDiffJobFails;
private @Metric MutableCounterLong numSnapshotActive;
private @Metric MutableCounterLong numSnapshotDeleted;
@@ -477,6 +483,14 @@ public void incNumSnapshotCreateFails() {
numSnapshotCreateFails.incr();
}
+ public void incNumSnapshotRenames() {
+ numSnapshotRenames.incr();
+ }
+
+ public void incNumSnapshotRenameFails() {
+ numSnapshotRenameFails.incr();
+ }
+
public void incNumSnapshotDeletes() {
numSnapshotDeletes.incr();
}
@@ -505,6 +519,22 @@ public void incNumSnapshotDiffJobs() {
numSnapshotDiffJobs.incr();
}
+ public void incNumCancelSnapshotDiffs() {
+ numCancelSnapshotDiffs.incr();
+ }
+
+ public void incNumCancelSnapshotDiffJobFails() {
+ numCancelSnapshotDiffFails.incr();
+ }
+
+ public void incNumListSnapshotDiffJobs() {
+ numListSnapshotDiffJobs.incr();
+ }
+
+ public void incNumListSnapshotDiffJobFails() {
+ numListSnapshotDiffJobFails.incr();
+ }
+
public void incNumSnapshotListFails() {
numSnapshotListFails.incr();
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index ec2b9964f53..44ac2c9bf59 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -5060,6 +5060,7 @@ public CancelSnapshotDiffResponse
cancelSnapshotDiff(String volume,
auditMap.put(OzoneConsts.BUCKET, bucket);
auditMap.put(OzoneConsts.FROM_SNAPSHOT, fromSnapshot);
auditMap.put(OzoneConsts.TO_SNAPSHOT, toSnapshot);
+ metrics.incNumCancelSnapshotDiffs();
try {
ResolvedBucket resolvedBucket = this.resolveBucketLink(Pair.of(volume,
bucket), false);
@@ -5070,6 +5071,7 @@ public CancelSnapshotDiffResponse
cancelSnapshotDiff(String volume,
return omSnapshotManager.cancelSnapshotDiff(resolvedBucket.realVolume(),
resolvedBucket.realBucket(),
fromSnapshot, toSnapshot);
} catch (Exception ex) {
+ metrics.incNumCancelSnapshotDiffJobFails();
auditSuccess = false;
AUDIT.logReadFailure(buildAuditMessageForFailure(OMAction.CANCEL_SNAPSHOT_DIFF_JOBS,
auditMap, ex));
@@ -5095,6 +5097,7 @@ public ListSnapshotDiffJobResponse listSnapshotDiffJobs(
auditMap.put(OzoneConsts.VOLUME, volume);
auditMap.put(OzoneConsts.BUCKET, bucket);
auditMap.put(OzoneConsts.JOB_STATUS, jobStatus);
+ metrics.incNumListSnapshotDiffJobs();
try {
ResolvedBucket resolvedBucket = this.resolveBucketLink(Pair.of(volume,
bucket), false);
@@ -5105,6 +5108,7 @@ public ListSnapshotDiffJobResponse listSnapshotDiffJobs(
return
omSnapshotManager.getSnapshotDiffList(resolvedBucket.realVolume(),
resolvedBucket.realBucket(),
jobStatus, listAllStatus, prevSnapshotDiffJob, maxListResult);
} catch (Exception ex) {
+ metrics.incNumListSnapshotDiffJobFails();
auditSuccess = false;
AUDIT.logReadFailure(buildAuditMessageForFailure(OMAction.LIST_SNAPSHOT_DIFF_JOBS,
auditMap, ex));
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotRenameRequest.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotRenameRequest.java
index 5a25d859472..7a4cdc640dc 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotRenameRequest.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/request/snapshot/OMSnapshotRenameRequest.java
@@ -31,6 +31,7 @@
import org.apache.hadoop.ozone.OmUtils;
import org.apache.hadoop.ozone.audit.AuditLogger;
import org.apache.hadoop.ozone.audit.OMAction;
+import org.apache.hadoop.ozone.om.OMMetrics;
import org.apache.hadoop.ozone.om.OmMetadataManagerImpl;
import org.apache.hadoop.ozone.om.OzoneManager;
import org.apache.hadoop.ozone.om.ResolvedBucket;
@@ -110,6 +111,9 @@ public OMRequest preExecute(OzoneManager ozoneManager)
throws IOException {
@Override
public OMClientResponse validateAndUpdateCache(OzoneManager ozoneManager,
ExecutionContext context) {
+ OMMetrics omMetrics = ozoneManager.getMetrics();
+ omMetrics.incNumSnapshotRenames();
+
boolean acquiredBucketLock = false;
boolean acquiredSnapshotOldLock = false;
boolean acquiredSnapshotNewLock = false;
@@ -153,7 +157,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager
ozoneManager, Execut
String snapshotNewTableKey = SnapshotInfo.getTableKey(volumeName,
bucketName, snapshotNewName);
if
(omMetadataManager.getSnapshotInfoTable().isExist(snapshotNewTableKey)) {
- throw new OMException("Snapshot with name " + snapshotNewName +
"already exist",
+ throw new OMException("Snapshot with name " + snapshotNewName + "
already exist",
FILE_ALREADY_EXISTS);
}
@@ -200,6 +204,7 @@ public OMClientResponse validateAndUpdateCache(OzoneManager
ozoneManager, Execut
omResponse.build(), snapshotOldTableKey, snapshotNewTableKey,
snapshotOldInfo);
} catch (IOException | InvalidPathException ex) {
+ omMetrics.incNumSnapshotRenameFails();
exception = ex;
omClientResponse = new OMSnapshotRenameResponse(
createErrorOMResponse(omResponse, exception));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]