This is an automated email from the ASF dual-hosted git repository.
weichiu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 37589a4e3d4 HDDS-12887. Added Deletion Progress metrics to OM Web UI.
(#8655)
37589a4e3d4 is described below
commit 37589a4e3d4b2e05553587b518370282751056c2
Author: SaketaChalamchala <[email protected]>
AuthorDate: Thu Jul 31 17:34:19 2025 -0700
HDDS-12887. Added Deletion Progress metrics to OM Web UI. (#8655)
Co-authored-by: Wei-Chiu Chuang <[email protected]>
---
.../hadoop/hdds/utils/BackgroundService.java | 1 +
.../hadoop/ozone/om/DeletingServiceMetrics.java | 127 ++++++++++++++++
.../org/apache/hadoop/ozone/om/KeyManagerImpl.java | 7 +-
.../hadoop/ozone/om/PendingKeysDeletion.java | 16 +-
.../ozone/om/service/KeyDeletingService.java | 96 ++++++++++--
.../webapps/ozoneManager/om-overview.html | 61 +++++++-
.../resources/webapps/ozoneManager/ozoneManager.js | 43 ++++--
.../ozone/om/service/TestKeyDeletingService.java | 167 ++++++++++++++++++++-
8 files changed, 484 insertions(+), 34 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java
index f5dc647871a..ca45e73ac1b 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/BackgroundService.java
@@ -108,6 +108,7 @@ public void runPeriodicalTaskNow() throws Exception {
while (!tasks.isEmpty()) {
tasks.poll().call();
}
+ execTaskCompletion();
}
// start service
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/DeletingServiceMetrics.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/DeletingServiceMetrics.java
index baa4a34e774..09f097bd5a6 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/DeletingServiceMetrics.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/DeletingServiceMetrics.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.ozone.om;
import com.google.common.annotations.VisibleForTesting;
+import java.time.Instant;
+import java.util.concurrent.TimeUnit;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
@@ -68,6 +70,45 @@ public final class DeletingServiceMetrics {
@Metric("Total no. of rename entries purged")
private MutableGaugeLong numRenameEntriesPurged;
+ /*
+ * Key deletion metrics in the last 24 hours.
+ */
+ private static final long METRIC_RESET_INTERVAL = TimeUnit.DAYS.toSeconds(1);
+ @Metric("Last time the metrics were reset")
+ private MutableGaugeLong metricsResetTimeStamp;
+ @Metric("No. of reclaimed keys in the last interval")
+ private MutableGaugeLong keysReclaimedInInterval;
+ @Metric("Replicated size of reclaimed keys in the last interval (bytes)")
+ private MutableGaugeLong reclaimedSizeInInterval;
+
+ /*
+ * Deletion service state metrics.
+ */
+ @Metric("Key Deleting Service last run timestamp in ms")
+ private MutableGaugeLong kdsLastRunTimestamp;
+ @Metric("Key Deleting Service current run timestamp in ms")
+ private MutableGaugeLong kdsCurRunTimestamp;
+
+ /*
+ * Deletion service last run metrics.
+ */
+ @Metric("AOS: No. of reclaimed keys in the last run")
+ private MutableGaugeLong aosKeysReclaimedLast;
+ @Metric("AOS: Replicated size of reclaimed keys in the last run (bytes)")
+ private MutableGaugeLong aosReclaimedSizeLast;
+ @Metric("AOS: No. of iterated keys in the last run")
+ private MutableGaugeLong aosKeysIteratedLast;
+ @Metric("AOS: No. of not reclaimable keys the last run")
+ private MutableGaugeLong aosKeysNotReclaimableLast;
+ @Metric("Snapshot: No. of reclaimed keys in the last run")
+ private MutableGaugeLong snapKeysReclaimedLast;
+ @Metric("Snapshot: Replicated size of reclaimed keys in the last run
(bytes)")
+ private MutableGaugeLong snapReclaimedSizeLast;
+ @Metric("Snapshot: No. of iterated keys in the last run")
+ private MutableGaugeLong snapKeysIteratedLast;
+ @Metric("Snapshot: No. of not reclaimable keys the last run")
+ private MutableGaugeLong snapKeysNotReclaimableLast;
+
private DeletingServiceMetrics() {
this.registry = new MetricsRegistry(METRICS_SOURCE_NAME);
}
@@ -160,6 +201,92 @@ public void incrNumRenameEntriesPurged(long
renameEntriesPurged) {
this.numRenameEntriesPurged.incr(renameEntriesPurged);
}
+ public void setKdsLastRunTimestamp(long timestamp) {
+ this.kdsLastRunTimestamp.set(timestamp);
+ }
+
+ public void setKdsCurRunTimestamp(long timestamp) {
+ this.kdsCurRunTimestamp.set(timestamp);
+ }
+
+ private void resetMetrics() {
+ this.keysReclaimedInInterval.set(0);
+ this.reclaimedSizeInInterval.set(0);
+ }
+
+ private void checkAndResetMetrics() {
+ long currentTime = Instant.now().getEpochSecond();
+ if (metricsResetTimeStamp.value() == 0) {
+ this.metricsResetTimeStamp.set(currentTime);
+ }
+ if (currentTime - metricsResetTimeStamp.value() > METRIC_RESET_INTERVAL) {
+ resetMetrics();
+ this.metricsResetTimeStamp.set(currentTime);
+ }
+ }
+
+ public void updateIntervalCumulativeMetrics(long keysReclaimed, long
replicatedSizeBytes) {
+ checkAndResetMetrics();
+ this.keysReclaimedInInterval.incr(keysReclaimed);
+ this.reclaimedSizeInInterval.incr(replicatedSizeBytes);
+ }
+
+ public long getKeysReclaimedInInterval() {
+ return keysReclaimedInInterval.value();
+ }
+
+ public long getReclaimedSizeInInterval() {
+ return reclaimedSizeInInterval.value();
+ }
+
+ public void updateAosLastRunMetrics(long keysReclaimed, long
replicatedSizeBytes, long iteratedKeys,
+ long notReclaimableKeys) {
+ this.aosKeysReclaimedLast.set(keysReclaimed);
+ this.aosReclaimedSizeLast.set(replicatedSizeBytes);
+ this.aosKeysIteratedLast.set(iteratedKeys);
+ this.aosKeysNotReclaimableLast.set(notReclaimableKeys);
+ }
+
+ public long getAosKeysReclaimedLast() {
+ return aosKeysReclaimedLast.value();
+ }
+
+ public long getAosReclaimedSizeLast() {
+ return aosReclaimedSizeLast.value();
+ }
+
+ public long getAosKeysIteratedLast() {
+ return aosKeysIteratedLast.value();
+ }
+
+ public long getAosKeysNotReclaimableLast() {
+ return aosKeysNotReclaimableLast.value();
+ }
+
+ public void updateSnapLastRunMetrics(long keysReclaimed, long
replicatedSizeBytes, long iteratedKeys,
+ long notReclaimableKeys) {
+ this.snapKeysReclaimedLast.set(keysReclaimed);
+ this.snapReclaimedSizeLast.set(replicatedSizeBytes);
+ this.snapKeysIteratedLast.set(iteratedKeys);
+ this.snapKeysNotReclaimableLast.set(notReclaimableKeys);
+ }
+
+ public long getSnapKeysReclaimedLast() {
+ return snapKeysReclaimedLast.value();
+ }
+
+ public long getSnapReclaimedSizeLast() {
+ return snapReclaimedSizeLast.value();
+ }
+
+ public long getSnapKeysIteratedLast() {
+ return snapKeysIteratedLast.value();
+ }
+
+ public long getSnapKeysNotReclaimableLast() {
+ return snapKeysNotReclaimableLast.value();
+ }
+
@VisibleForTesting
public void resetDirectoryMetrics() {
numDirsPurged.set(0);
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
index 5a6bb427f4d..10c29b960ea 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java
@@ -744,6 +744,9 @@ public PendingKeysDeletion getPendingDeletionKeys(
List<BlockGroup> keyBlocksList = Lists.newArrayList();
long serializedSize = 0;
Map<String, RepeatedOmKeyInfo> keysToModify = new HashMap<>();
+ Map<String, Long> keyBlockReplicatedSize = new HashMap<>();
+ int notReclaimableKeyCount = 0;
+
// Bucket prefix would be empty if volume is empty i.e. either null or "".
Optional<String> bucketPrefix = getBucketPrefix(volume, bucket, false);
try (TableIterator<String, ? extends KeyValue<String, RepeatedOmKeyInfo>>
@@ -785,6 +788,7 @@ public PendingKeysDeletion getPendingDeletionKeys(
}
break;
}
+ keyBlockReplicatedSize.put(keyBlocks.getGroupID(),
info.getReplicatedSize());
blockGroupList.add(keyBlocks);
currentCount++;
} else {
@@ -803,10 +807,11 @@ public PendingKeysDeletion getPendingDeletionKeys(
keysToModify.put(kv.getKey(), notReclaimableKeyInfo);
}
keyBlocksList.addAll(blockGroupList);
+ notReclaimableKeyCount += notReclaimableKeyInfoList.size();
}
}
}
- return new PendingKeysDeletion(keyBlocksList, keysToModify);
+ return new PendingKeysDeletion(keyBlocksList, keysToModify,
keyBlockReplicatedSize, notReclaimableKeyCount);
}
private <V, R> List<KeyValue<String, R>> getTableEntries(String startKey,
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/PendingKeysDeletion.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/PendingKeysDeletion.java
index e1fbdfb107a..ab8c4dda167 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/PendingKeysDeletion.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/PendingKeysDeletion.java
@@ -37,11 +37,17 @@ public class PendingKeysDeletion {
private Map<String, RepeatedOmKeyInfo> keysToModify;
private List<BlockGroup> keyBlocksList;
+ private Map<String, Long> keyBlockReplicatedSize;
+ private int notReclaimableKeyCount;
public PendingKeysDeletion(List<BlockGroup> keyBlocksList,
- Map<String, RepeatedOmKeyInfo> keysToModify) {
+ Map<String, RepeatedOmKeyInfo> keysToModify,
+ Map<String, Long> keyBlockReplicatedSize,
+ int notReclaimableKeyCount) {
this.keysToModify = keysToModify;
this.keyBlocksList = keyBlocksList;
+ this.keyBlockReplicatedSize = keyBlockReplicatedSize;
+ this.notReclaimableKeyCount = notReclaimableKeyCount;
}
public Map<String, RepeatedOmKeyInfo> getKeysToModify() {
@@ -51,4 +57,12 @@ public Map<String, RepeatedOmKeyInfo> getKeysToModify() {
public List<BlockGroup> getKeyBlocksList() {
return keyBlocksList;
}
+
+ public Map<String, Long> getKeyBlockReplicatedSize() {
+ return keyBlockReplicatedSize;
+ }
+
+ public int getNotReclaimableKeyCount() {
+ return notReclaimableKeyCount;
+ }
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
index c9c1237c27e..a106d1f6b22 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/KeyDeletingService.java
@@ -91,6 +91,10 @@ public class KeyDeletingService extends
AbstractKeyDeletingService {
private final SnapshotChainManager snapshotChainManager;
private int ratisByteLimit;
private static final double RATIS_LIMIT_FACTOR = 0.9;
+ // Track metrics for current task execution
+ private long latestRunTimestamp = 0L;
+ private final DeletionStats aosDeletionStats = new DeletionStats();
+ private final DeletionStats snapshotDeletionStats = new DeletionStats();
public KeyDeletingService(OzoneManager ozoneManager,
ScmBlockLocationProtocol scmClient, long serviceInterval,
@@ -125,11 +129,12 @@ public AtomicLong getDeletedKeyCount() {
return deletedKeyCount;
}
- Pair<Integer, Boolean> processKeyDeletes(List<BlockGroup> keyBlocksList,
+ Pair<Pair<Integer, Long>, Boolean> processKeyDeletes(List<BlockGroup>
keyBlocksList,
Map<String, RepeatedOmKeyInfo> keysToModify, List<String> renameEntries,
- String snapTableKey, UUID expectedPreviousSnapshotId) throws
IOException, InterruptedException {
+ String snapTableKey, UUID expectedPreviousSnapshotId, Map<String, Long>
keyBlockReplicatedSize)
+ throws IOException, InterruptedException {
long startTime = Time.monotonicNow();
- Pair<Integer, Boolean> purgeResult = Pair.of(0, false);
+ Pair<Pair<Integer, Long>, Boolean> purgeResult = Pair.of(Pair.of(0, 0L),
false);
if (LOG.isDebugEnabled()) {
LOG.debug("Send {} key(s) to SCM: {}",
keyBlocksList.size(), keyBlocksList);
@@ -148,11 +153,11 @@ Pair<Integer, Boolean> processKeyDeletes(List<BlockGroup>
keyBlocksList,
if (blockDeletionResults != null) {
long purgeStartTime = Time.monotonicNow();
purgeResult = submitPurgeKeysRequest(blockDeletionResults,
- keysToModify, renameEntries, snapTableKey,
expectedPreviousSnapshotId);
+ keysToModify, renameEntries, snapTableKey,
expectedPreviousSnapshotId, keyBlockReplicatedSize);
int limit =
getOzoneManager().getConfiguration().getInt(OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK,
OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK_DEFAULT);
LOG.info("Blocks for {} (out of {}) keys are deleted from DB in {} ms.
Limit per task is {}.",
- purgeResult, blockDeletionResults.size(), Time.monotonicNow() -
purgeStartTime, limit);
+ purgeResult.getKey().getKey(), blockDeletionResults.size(),
Time.monotonicNow() - purgeStartTime, limit);
}
getPerfMetrics().setKeyDeletingServiceLatencyMs(Time.monotonicNow() -
startTime);
return purgeResult;
@@ -164,13 +169,15 @@ Pair<Integer, Boolean> processKeyDeletes(List<BlockGroup>
keyBlocksList,
* @param results DeleteBlockGroups returned by SCM.
* @param keysToModify Updated list of RepeatedOmKeyInfo
*/
- private Pair<Integer, Boolean>
submitPurgeKeysRequest(List<DeleteBlockGroupResult> results,
+ private Pair<Pair<Integer, Long>, Boolean>
submitPurgeKeysRequest(List<DeleteBlockGroupResult> results,
Map<String, RepeatedOmKeyInfo> keysToModify, List<String>
renameEntriesToBeDeleted,
- String snapTableKey, UUID expectedPreviousSnapshotId) throws
InterruptedException {
+ String snapTableKey, UUID expectedPreviousSnapshotId, Map<String, Long>
keyBlockReplicatedSize)
+ throws InterruptedException {
List<String> purgeKeys = new ArrayList<>();
// Put all keys to be purged in a list
int deletedCount = 0;
+ long deletedReplSize = 0;
Set<String> failedDeletedKeys = new HashSet<>();
boolean purgeSuccess = true;
for (DeleteBlockGroupResult result : results) {
@@ -190,6 +197,9 @@ private Pair<Integer, Boolean>
submitPurgeKeysRequest(List<DeleteBlockGroupResul
LOG.debug("Key {} set to be purged from OM DB", deletedKey);
}
}
+ if (keyBlockReplicatedSize != null) {
+ deletedReplSize += keyBlockReplicatedSize.getOrDefault(deletedKey,
0L);
+ }
deletedCount++;
} else {
// If the block deletion failed, then the deleted keys should also not
be modified.
@@ -254,14 +264,42 @@ private Pair<Integer, Boolean>
submitPurgeKeysRequest(List<DeleteBlockGroupResul
}
} catch (ServiceException e) {
LOG.error("PurgeKey request failed. Will retry at next run.", e);
- return Pair.of(0, false);
+ return Pair.of(Pair.of(0, 0L), false);
}
- return Pair.of(deletedCount, purgeSuccess);
+ return Pair.of(Pair.of(deletedCount, deletedReplSize), purgeSuccess);
+ }
+
+ /**
+ * Updates ServiceMetrics for the last run of the service.
+ */
+ @Override
+ protected void execTaskCompletion() {
+ getMetrics().updateIntervalCumulativeMetrics(
+ aosDeletionStats.reclaimedKeyCount.get() +
snapshotDeletionStats.reclaimedKeyCount.get(),
+ aosDeletionStats.reclaimedKeySize.get() +
snapshotDeletionStats.reclaimedKeySize.get());
+
getMetrics().updateAosLastRunMetrics(aosDeletionStats.reclaimedKeyCount.get(),
+ aosDeletionStats.reclaimedKeySize.get(),
aosDeletionStats.iteratedKeyCount.get(),
+ aosDeletionStats.notReclaimableKeyCount.get());
+
getMetrics().updateSnapLastRunMetrics(snapshotDeletionStats.reclaimedKeyCount.get(),
+ snapshotDeletionStats.reclaimedKeySize.get(),
snapshotDeletionStats.iteratedKeyCount.get(),
+ snapshotDeletionStats.notReclaimableKeyCount.get());
+ getMetrics().setKdsLastRunTimestamp(latestRunTimestamp);
+ }
+
+ /**
+ * Resets ServiceMetrics for the current run of the service.
+ */
+ private void resetMetrics() {
+ aosDeletionStats.reset();
+ snapshotDeletionStats.reset();
+ latestRunTimestamp = System.currentTimeMillis();
+ getMetrics().setKdsCurRunTimestamp(latestRunTimestamp);
}
@Override
public BackgroundTaskQueue getTasks() {
+ resetMetrics();
BackgroundTaskQueue queue = new BackgroundTaskQueue();
queue.add(new KeyDeletingTask(null));
if (deepCleanSnapshots) {
@@ -320,7 +358,6 @@ private
OzoneManagerProtocolProtos.SetSnapshotPropertyRequest getSetSnapshotRequ
}
/**
- *
* @param currentSnapshotInfo if null, deleted directories in AOS should
be processed.
* @param keyManager KeyManager of the underlying store.
*/
@@ -381,14 +418,21 @@ private void processDeletedKeysForStore(SnapshotInfo
currentSnapshotInfo, KeyMan
// Validating if the previous snapshot is still the same before
purging the blocks.
SnapshotUtils.validatePreviousSnapshotId(currentSnapshotInfo,
snapshotChainManager,
expectedPreviousSnapshotId);
- Pair<Integer, Boolean> purgeResult =
processKeyDeletes(keyBlocksList, pendingKeysDeletion.getKeysToModify(),
- renamedTableEntries, snapshotTableKey,
expectedPreviousSnapshotId);
- remainNum -= purgeResult.getKey();
+ Pair<Pair<Integer, Long>, Boolean> purgeResult =
processKeyDeletes(keyBlocksList,
+ pendingKeysDeletion.getKeysToModify(), renamedTableEntries,
snapshotTableKey,
+ expectedPreviousSnapshotId,
pendingKeysDeletion.getKeyBlockReplicatedSize());
+ remainNum -= purgeResult.getKey().getKey();
successStatus = purgeResult.getValue();
getMetrics().incrNumKeysProcessed(keyBlocksList.size());
- getMetrics().incrNumKeysSentForPurge(purgeResult.getKey());
+
getMetrics().incrNumKeysSentForPurge(purgeResult.getKey().getKey());
+
+ DeletionStats statsToUpdate = currentSnapshotInfo == null ?
aosDeletionStats : snapshotDeletionStats;
+ statsToUpdate.updateDeletionStats(purgeResult.getKey().getKey(),
purgeResult.getKey().getValue(),
+ keyBlocksList.size() +
pendingKeysDeletion.getNotReclaimableKeyCount(),
+ pendingKeysDeletion.getNotReclaimableKeyCount()
+ );
if (successStatus) {
- deletedKeyCount.addAndGet(purgeResult.getKey());
+ deletedKeyCount.addAndGet(purgeResult.getKey().getKey());
}
}
@@ -480,4 +524,26 @@ public BackgroundTaskResult call() {
return EmptyTaskResult.newResult();
}
}
+
+ private static class DeletionStats {
+ private final AtomicLong reclaimedKeyCount = new AtomicLong(0L);
+ private final AtomicLong reclaimedKeySize = new AtomicLong(0L);
+ private final AtomicLong iteratedKeyCount = new AtomicLong(0L);
+ private final AtomicLong notReclaimableKeyCount = new AtomicLong(0L);
+
+ private void updateDeletionStats(long reclaimedKeys, long reclaimedSize,
+ long iteratedKeys, long
notReclaimableKeys) {
+ this.reclaimedKeyCount.addAndGet(reclaimedKeys);
+ this.reclaimedKeySize.addAndGet(reclaimedSize);
+ this.iteratedKeyCount.addAndGet(iteratedKeys);
+ this.notReclaimableKeyCount.addAndGet(notReclaimableKeys);
+ }
+
+ private void reset() {
+ reclaimedKeyCount.set(0L);
+ reclaimedKeySize.set(0L);
+ iteratedKeyCount.set(0L);
+ notReclaimableKeyCount.set(0L);
+ }
+ }
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html
b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html
index 56127481989..c90ec67ce9d 100644
---
a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html
+++
b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/om-overview.html
@@ -90,4 +90,63 @@ <h2>Meta-Data Volume Information</h2>
<td>{{$ctrl.overview.jmx.RocksDbDirectory}}</td>
</tr>
</tbody>
-</table>
\ No newline at end of file
+</table>
+
+<!-- Only display Deletion Progress on OM leader -->
+<div ng-show="$ctrl.role.Role.trim() === 'LEADER'">
+ <h2>Deletion Progress [since {{$ctrl.overview.jmx.MetricsResetTimeStamp *
1000 | date:'yyyy-MM-dd HH:mm:ss'}}]
+ •
+ <b>Size Reclaimed:</b>
{{$ctrl.formatBytes($ctrl.overview.jmx.ReclaimedSizeInInterval)}}
+ •
+ <b>Keys Reclaimed:</b> {{$ctrl.overview.jmx.KeysReclaimedInInterval}}
+ </h2>
+ <div class="mt-4">
+ <div class="mt-3">
+ <div class="col-md-12">
+ <button class="btn btn-sm btn-link p-0"
ng-click="lastRunDetailsVisible = !lastRunDetailsVisible">
+ {{ lastRunDetailsVisible ? 'Last Run Details(-)' : 'Last Run
Details(+)' }}
+ </button>
+ </div>
+ <div ng-show="lastRunDetailsVisible">
+ <div class="mt-3">
+ <div class="row mb-2"
ng-if="$ctrl.overview.jmx.KdsCurRunTimestamp">
+ <div class="col-md-3"><b>Current Run Started:</b></div>
+ <div
class="col-md-9">{{$ctrl.convertMsToTime($ctrl.Date.now() -
$ctrl.overview.jmx.KdsCurRunTimestamp)}} ago</div>
+ </div>
+ <div class="row mb-2"
ng-if="$ctrl.overview.jmx.KdsLastRunTimestamp">
+ <div class="col-md-3"><b>Last Run:</b></div>
+ <div
class="col-md-9">{{$ctrl.convertMsToTime($ctrl.Date.now() -
$ctrl.overview.jmx.KdsLastRunTimestamp)}} ago</div>
+ </div>
+ </div>
+ <div style="margin-bottom: 2px;"></div>
+ <table class="table table-sm table-bordered mt-2">
+ <thead>
+ <tr>
+ <th>Store</th>
+ <th>Reclaimed Size</th>
+ <th>#Reclaimed Keys</th>
+ <th>#Iterated Keys</th>
+ <th>#NotReclaimable Keys (Referred by Snapshots)</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>Active Object Store</td>
+
<td>{{$ctrl.formatBytes($ctrl.overview.jmx.AosReclaimedSizeLast)}}</td>
+ <td>{{$ctrl.overview.jmx.AosKeysReclaimedLast ||
0}}</td>
+ <td>{{$ctrl.overview.jmx.AosKeysIteratedLast ||
0}}</td>
+ <td>{{$ctrl.overview.jmx.AosKeysNotReclaimableLast
|| 0}}</td>
+ </tr>
+ <tr>
+ <td>Snapshots</td>
+
<td>{{$ctrl.formatBytes($ctrl.overview.jmx.SnapReclaimedSizeLast)}}</td>
+ <td>{{$ctrl.overview.jmx.SnapKeysReclaimedLast ||
0}}</td>
+ <td>{{$ctrl.overview.jmx.SnapKeysIteratedLast ||
0}}</td>
+ <td>{{$ctrl.overview.jmx.SnapKeysNotReclaimableLast
|| 0}}</td>
+ </tr>
+ </tbody>
+ </table>
+ </div>
+ </div>
+ </div>
+</div>
diff --git
a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js
b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js
index bd402b1b43a..85746a0889f 100644
---
a/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js
+++
b/hadoop-ozone/ozone-manager/src/main/resources/webapps/ozoneManager/ozoneManager.js
@@ -115,6 +115,28 @@
},
controller: function ($http) {
var ctrl = this;
+ ctrl.Date = Date;
+
+ ctrl.formatBytes = function(bytes, decimals) {
+ if(bytes == 0) return '0 Bytes';
+ var k = 1024, // or 1024 for binary
+ dm = decimals + 1 || 3,
+ sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB',
'YB'],
+ i = Math.floor(Math.log(bytes) / Math.log(k));
+ return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' +
sizes[i];
+ }
+
+ ctrl.convertMsToTime = function(ms) {
+ let seconds = (ms / 1000).toFixed(1);
+ let minutes = (ms / (1000 * 60)).toFixed(1);
+ let hours = (ms / (1000 * 60 * 60)).toFixed(1);
+ let days = (ms / (1000 * 60 * 60 * 24)).toFixed(1);
+ if (seconds < 60) return seconds + " Seconds";
+ else if (minutes < 60) return minutes + " Minutes";
+ else if (hours < 24) return hours + " Hours";
+ else return days + " Days"
+ };
+
$http.get("jmx?qry=Ratis:service=RaftServer,group=*,id=*")
.then(function (result) {
ctrl.role = result.data.beans[0];
@@ -129,19 +151,18 @@
.then(function (result) {
ctrl.elapsedTime = result.data.beans[0];
if(ctrl.elapsedTime.Value != -1){
- ctrl.elapsedTime.Value =
convertMsToTime(ctrl.elapsedTime.Value);
+ ctrl.elapsedTime.Value =
ctrl.convertMsToTime(ctrl.elapsedTime.Value);
+ }
+ });
+
+ // Add JMX query to fetch DeletingServiceMetrics data
+
$http.get("jmx?qry=Hadoop:service=OzoneManager,name=DeletingServiceMetrics")
+ .then(function (result) {
+ if (result.data.beans && result.data.beans.length > 0) {
+ // Merge the DeletingServiceMetrics data into the
existing overview.jmx object
+ ctrl.overview.jmx = {...ctrl.overview.jmx,
...result.data.beans[0]};
}
});
}
});
- function convertMsToTime(ms) {
- let seconds = (ms / 1000).toFixed(1);
- let minutes = (ms / (1000 * 60)).toFixed(1);
- let hours = (ms / (1000 * 60 * 60)).toFixed(1);
- let days = (ms / (1000 * 60 * 60 * 24)).toFixed(1);
- if (seconds < 60) return seconds + " Seconds";
- else if (minutes < 60) return minutes + " Minutes";
- else if (hours < 24) return hours + " Hours";
- else return days + " Days"
- }
})();
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
index 134e9b5f731..fe27f72d93f 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
@@ -72,6 +72,7 @@
import org.apache.hadoop.hdds.utils.db.DBConfigFromFile;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.common.BlockGroup;
+import org.apache.hadoop.ozone.om.DeletingServiceMetrics;
import org.apache.hadoop.ozone.om.KeyManager;
import org.apache.hadoop.ozone.om.KeyManagerImpl;
import org.apache.hadoop.ozone.om.OMConfigKeys;
@@ -135,6 +136,7 @@ class TestKeyDeletingService extends OzoneTestBase {
private static final Logger LOG =
LoggerFactory.getLogger(TestKeyDeletingService.class);
private static final AtomicInteger OBJECT_COUNTER = new AtomicInteger();
+ private static final long DATA_SIZE = 1000L;
private OzoneConfiguration conf;
private OzoneManagerProtocol writeClient;
@@ -145,6 +147,7 @@ class TestKeyDeletingService extends OzoneTestBase {
private DirectoryDeletingService directoryDeletingService;
private ScmBlockLocationTestingClient scmBlockTestingClient;
private int ratisLimit;
+ private DeletingServiceMetrics metrics;
@BeforeAll
void setup() {
@@ -152,15 +155,19 @@ void setup() {
}
private void createConfig(File testDir) {
+ createConfig(testDir, 100);
+ }
+
+ private void createConfig(File testDir, int delintervalMs) {
conf = new OzoneConfiguration();
System.setProperty(DBConfigFromFile.CONFIG_DIR, "/");
ServerUtils.setOzoneMetaDirPath(conf, testDir.toString());
conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL,
- 100, TimeUnit.MILLISECONDS);
+ delintervalMs, TimeUnit.MILLISECONDS);
conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL,
- 100, TimeUnit.MILLISECONDS);
+ delintervalMs, TimeUnit.MILLISECONDS);
conf.setTimeDuration(OZONE_DIR_DELETING_SERVICE_INTERVAL,
- 100, TimeUnit.MILLISECONDS);
+ delintervalMs, TimeUnit.MILLISECONDS);
conf.setTimeDuration(OZONE_SNAPSHOT_SST_FILTERING_SERVICE_INTERVAL,
1, TimeUnit.SECONDS);
conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL,
@@ -183,6 +190,7 @@ private void createSubject() throws Exception {
OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT_DEFAULT,
StorageUnit.BYTES);
ratisLimit = (int) (limit * 0.9);
+ metrics = keyDeletingService.getMetrics();
}
/**
@@ -814,7 +822,7 @@ public void testFailingModifiedKeyPurge() throws
IOException, InterruptedExcepti
.build();
Map<String, RepeatedOmKeyInfo> keysToModify =
Collections.singletonMap("key1",
new RepeatedOmKeyInfo(Collections.singletonList(omKeyInfo)));
- keyDeletingService.processKeyDeletes(blockGroups, keysToModify,
renameEntriesToBeDeleted, null, null);
+ keyDeletingService.processKeyDeletes(blockGroups, keysToModify,
renameEntriesToBeDeleted, null, null, null);
assertTrue(purgeRequest.get().getPurgeKeysRequest().getKeysToUpdateList().isEmpty());
assertEquals(renameEntriesToBeDeleted,
purgeRequest.get().getPurgeKeysRequest().getRenamedKeysList());
}
@@ -889,6 +897,155 @@ void checkDeletionForPartiallyCommitKey() throws
Exception {
}
}
+ /**
+ * Tests Metrics.
+ */
+ @Nested
+ @TestInstance(TestInstance.Lifecycle.PER_CLASS)
+ class Metrics {
+
+ @BeforeAll
+ void setup(@TempDir File testDir) throws Exception {
+ scmBlockTestingClient = new ScmBlockLocationTestingClient(null, null, 0);
+ createConfig(testDir, 3600_000);
+ createSubject();
+ }
+
+ @AfterAll
+ void cleanup() {
+ if (om.stop()) {
+ om.join();
+ }
+ }
+
+ /*
+ * Suspend DeletingService so that keys are not reclaimed.
+ * Create 10 keys
+ * Create Snap1
+ * Create 5 keys
+ * Delete all 15 keys
+ * Create snap2
+ * Create 5 keys
+ * Delete 5 keys
+ * Resume DeletingService
+ * wait for AOS deleted keys to be reclaimed -> Deleted 5. Not reclaimed 0.
+ * wait for snap3 to be deepCleaned -> Deleted 5. Not reclaimed 10.
+ * delete snap1 -> Wait for snap2 to be deep cleaned. -> Deleted 10. Not
reclaimed 0.
+ */
+ @Test
+ void testLastRunAnd24hMetrics() throws Exception {
+ // Suspend DeletingService
+ keyDeletingService.suspend();
+ directoryDeletingService.suspend();
+
+ final String volumeName = getTestName();
+ final String bucketName = uniqueObjectName("bucket");
+ createVolumeAndBucket(volumeName, bucketName, false);
+
+ // Create 10 keys
+ List<OmKeyArgs> createdKeys = new ArrayList<>();
+ for (int i = 1; i <= 10; i++) {
+ OmKeyArgs args = createAndCommitKey(volumeName, bucketName,
uniqueObjectName("key"), 1);
+ createdKeys.add(args);
+ }
+
+ // Create Snap1
+ String snap1 = uniqueObjectName("snap");
+ writeClient.createSnapshot(volumeName, bucketName, snap1);
+
+ // Create 5 Keys
+ for (int i = 11; i <= 15; i++) {
+ OmKeyArgs args = createAndCommitKey(volumeName, bucketName,
uniqueObjectName("key"), 1);
+ createdKeys.add(args);
+ }
+
+ // Delete all 15 keys.
+ for (int i = 0; i < 15; i++) {
+ writeClient.deleteKey(createdKeys.get(i));
+ }
+
+ // Create Snap2, traps all the deleted keys.
+ String snap2 = uniqueObjectName("snap");
+ writeClient.createSnapshot(volumeName, bucketName, snap2);
+
+ // Create and delete 5 more keys.
+ for (int i = 16; i <= 20; i++) {
+ OmKeyArgs args = createAndCommitKey(volumeName, bucketName,
uniqueObjectName("key"), 1);
+ createdKeys.add(args);
+ }
+ for (int i = 15; i < 20; i++) {
+ writeClient.deleteKey(createdKeys.get(i));
+ }
+
+ // Wait for snap2 to be flushed.
+ GenericTestUtils.waitFor(
+ () -> {
+ try {
+ SnapshotInfo snapshotInfo =
writeClient.getSnapshotInfo(volumeName, bucketName, snap2);
+ return
OmSnapshotManager.areSnapshotChangesFlushedToDB(metadataManager, snapshotInfo);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }, 1000, 100000);
+
+ // Resume DeletingService
+ keyDeletingService.resume();
+ directoryDeletingService.resume();
+
+ // wait for AOS deleted keys to be reclaimed and
+ // snap2 to be deep cleaned.
+ directoryDeletingService.runPeriodicalTaskNow();
+ keyDeletingService.runPeriodicalTaskNow();
+ GenericTestUtils.waitFor(() -> getDeletedKeyCount() == 10, 100, 10000);
+ // Verify last run AOS deletion metrics.
+ assertEquals(5, metrics.getAosKeysReclaimedLast());
+ assertEquals(5 * DATA_SIZE * 3, metrics.getAosReclaimedSizeLast());
+ assertEquals(5, metrics.getAosKeysIteratedLast());
+ assertEquals(0, metrics.getAosKeysNotReclaimableLast());
+ // Verify last run Snapshot deletion metrics.
+ assertEquals(5, metrics.getSnapKeysReclaimedLast());
+ assertEquals(5 * DATA_SIZE * 3, metrics.getSnapReclaimedSizeLast());
+ assertEquals(15, metrics.getSnapKeysIteratedLast());
+ assertEquals(10, metrics.getSnapKeysNotReclaimableLast());
+ // Verify 24h deletion metrics.
+ assertEquals(10, metrics.getKeysReclaimedInInterval());
+ assertEquals(10 * DATA_SIZE * 3, metrics.getReclaimedSizeInInterval());
+
+ // Delete snap1. Which also sets the snap2 to be deep cleaned.
+ writeClient.deleteSnapshot(volumeName, bucketName, snap1);
+ keyManager.getSnapshotDeletingService().runPeriodicalTaskNow();
+ // Wait for changes to the snap2 to be flushed.
+ GenericTestUtils.waitFor(
+ () -> {
+ try {
+ SnapshotInfo snapshotInfo =
writeClient.getSnapshotInfo(volumeName, bucketName, snap2);
+ return
OmSnapshotManager.areSnapshotChangesFlushedToDB(metadataManager, snapshotInfo);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }, 1000, 100000);
+
+ // wait for snap2 to be deep cleaned.
+ directoryDeletingService.runPeriodicalTaskNow();
+ keyDeletingService.runPeriodicalTaskNow();
+ GenericTestUtils.waitFor(() -> getDeletedKeyCount() == 20, 100, 10000);
+
+ // Verify last run AOS deletion metrics.
+ assertEquals(0, metrics.getAosKeysReclaimedLast());
+ assertEquals(0, metrics.getAosReclaimedSizeLast());
+ assertEquals(0, metrics.getAosKeysIteratedLast());
+ assertEquals(0, metrics.getAosKeysNotReclaimableLast());
+ // Verify last run Snapshot deletion metrics.
+ assertEquals(10, metrics.getSnapKeysReclaimedLast());
+ assertEquals(10 * DATA_SIZE * 3, metrics.getSnapReclaimedSizeLast());
+ assertEquals(10, metrics.getSnapKeysIteratedLast());
+ assertEquals(0, metrics.getSnapKeysNotReclaimableLast());
+ // Verify 24h deletion metrics.
+ assertEquals(20, metrics.getKeysReclaimedInInterval());
+ assertEquals(20 * DATA_SIZE * 3, metrics.getReclaimedSizeInInterval());
+ }
+ }
+
private void createAndDeleteKeys(int keyCount, int numBlocks) throws
IOException {
for (int x = 0; x < keyCount; x++) {
final String volumeName = getTestName();
@@ -1007,7 +1164,7 @@ private OmKeyArgs createAndCommitKey(String volumeName,
.setKeyName(keyName)
.setAcls(Collections.emptyList())
.setReplicationConfig(RatisReplicationConfig.getInstance(THREE))
- .setDataSize(1000L)
+ .setDataSize(DATA_SIZE)
.setLocationInfoList(new ArrayList<>())
.setOwnerName("user" + RandomStringUtils.secure().nextNumeric(5))
.build();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]