This is an automated email from the ASF dual-hosted git repository.

tejaskriya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new a5e1cd0a692 HDDS-13533. Show the summary of replicas verify checks 
(#8898)
a5e1cd0a692 is described below

commit a5e1cd0a6923d965441b1e69be9744099e045e56
Author: Sarveksha Yeshavantha Raju 
<[email protected]>
AuthorDate: Wed Sep 3 16:49:41 2025 +0530

    HDDS-13533. Show the summary of replicas verify checks (#8898)
---
 .../smoketest/debug/ozone-debug-keywords.robot     |   3 +-
 .../ozone/debug/replicas/ReplicasVerify.java       | 172 +++++++++++++++++++--
 2 files changed, 163 insertions(+), 12 deletions(-)

diff --git 
a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot 
b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot
index a6bed4524d2..d75bdd20607 100644
--- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot
+++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot
@@ -36,7 +36,8 @@ Execute replicas verify container state debug tool
 
 Parse replicas verify JSON output
     [Arguments]    ${output}
-    ${json} =      Evaluate    json.loads('''${output}''')    json
+    ${json_split} =  Evaluate  '''${output}'''.split('***')[0].strip()
+    ${json} =      Evaluate  json.loads('''${json_split}''')  json
     [Return]       ${json}
 
 Check to Verify Replicas
diff --git 
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java
 
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java
index aeaa689fcde..4dc810be6b5 100644
--- 
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java
+++ 
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/debug/replicas/ReplicasVerify.java
@@ -17,14 +17,25 @@
 
 package org.apache.hadoop.ozone.debug.replicas;
 
+import static 
org.apache.hadoop.ozone.conf.OzoneServiceConfig.DEFAULT_SHUTDOWN_HOOK_PRIORITY;
+
 import com.fasterxml.jackson.databind.node.ArrayNode;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.io.IOException;
+import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
+import org.apache.commons.lang3.time.DurationFormatUtils;
 import org.apache.hadoop.hdds.client.ReplicationConfig;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.scm.cli.ScmOption;
@@ -41,6 +52,7 @@
 import org.apache.hadoop.ozone.shell.OzoneAddress;
 import org.apache.hadoop.ozone.shell.Shell;
 import org.apache.hadoop.ozone.shell.ShellReplicationOptions;
+import org.apache.hadoop.ozone.util.ShutdownHookManager;
 import picocli.CommandLine;
 
 /**
@@ -79,22 +91,80 @@ public class ReplicasVerify extends Handler {
 
   private List<ReplicaVerifier> replicaVerifiers;
 
+  private static final String DURATION_FORMAT = "HH:mm:ss,SSS";
+  private long startTime;
+  private long endTime;
+  private String verificationScope;
+  private final List<String> verificationTypes = new ArrayList<>();
+  private final AtomicInteger volumesProcessed = new AtomicInteger(0);
+  private final AtomicInteger bucketsProcessed = new AtomicInteger(0);
+  private final AtomicInteger keysProcessed = new AtomicInteger(0);
+  private final AtomicInteger keysPassed = new AtomicInteger(0);
+  private final AtomicInteger keysFailed = new AtomicInteger(0);
+  private final Map<String, AtomicInteger> failuresByType = new 
ConcurrentHashMap<>();
+  private volatile Throwable exception;
+
+  private void addVerifier(boolean condition, Supplier<ReplicaVerifier> 
verifierSupplier) {
+    if (condition) {
+      ReplicaVerifier verifier = verifierSupplier.get();
+      replicaVerifiers.add(verifier);
+      String verifierType = verifier.getType();
+      verificationTypes.add(verifierType);
+      failuresByType.put(verifierType, new AtomicInteger(0));
+    }
+  }
+
   @Override
   protected void execute(OzoneClient client, OzoneAddress address) throws 
IOException {
+    startTime = System.nanoTime();
+
+    if (!address.getKeyName().isEmpty()) {
+      verificationScope = "Key";
+    } else if (!address.getBucketName().isEmpty()) {
+      verificationScope = "Bucket";
+    } else if (!address.getVolumeName().isEmpty()) {
+      verificationScope = "Volume";
+    } else {
+      verificationScope = "All Volumes";
+    }
+
     replicaVerifiers = new ArrayList<>();
 
-    if (verification.doExecuteChecksums) {
-      replicaVerifiers.add(new ChecksumVerifier(getConf()));
-    }
+    addVerifier(verification.doExecuteChecksums, () -> {
+      try {
+        return new ChecksumVerifier(getConf());
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    });
 
-    if (verification.doExecuteBlockExistence) {
-      replicaVerifiers.add(new BlockExistenceVerifier(getConf()));
-    }
-    if (verification.doExecuteReplicaState) {
-      replicaVerifiers.add(new ContainerStateVerifier(getConf(), 
containerCacheSize));
-    }
+    addVerifier(verification.doExecuteBlockExistence, () -> {
+      try {
+        return new BlockExistenceVerifier(getConf());
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    });
 
-    findCandidateKeys(client, address);
+    addVerifier(verification.doExecuteReplicaState, () -> {
+      try {
+        return new ContainerStateVerifier(getConf(), containerCacheSize);
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    });
+
+    // Add shutdown hook to ensure summary is printed even if interrupted
+    addShutdownHook();
+
+    try {
+      findCandidateKeys(client, address);
+    } catch (Exception e) {
+      exception = e;
+      throw e;
+    } finally {
+      endTime = System.nanoTime();
+    }
   }
 
   @Override
@@ -133,6 +203,7 @@ void findCandidateKeys(OzoneClient ozoneClient, 
OzoneAddress address) throws IOE
 
   void checkVolume(OzoneClient ozoneClient, OzoneVolume volume, ArrayNode 
keysArray, AtomicBoolean allKeysPassed)
       throws IOException {
+    volumesProcessed.incrementAndGet();
     for (Iterator<? extends OzoneBucket> it = volume.listBuckets(null); 
it.hasNext();) {
       OzoneBucket bucket = it.next();
       checkBucket(ozoneClient, bucket, keysArray, allKeysPassed);
@@ -141,6 +212,7 @@ void checkVolume(OzoneClient ozoneClient, OzoneVolume 
volume, ArrayNode keysArra
 
   void checkBucket(OzoneClient ozoneClient, OzoneBucket bucket, ArrayNode 
keysArray, AtomicBoolean allKeysPassed)
       throws IOException {
+    bucketsProcessed.incrementAndGet();
     for (Iterator<? extends OzoneKey> it = bucket.listKeys(null); 
it.hasNext();) {
       OzoneKey key = it.next();
       // TODO: Remove this check once HDDS-12094 is fixed
@@ -152,6 +224,7 @@ void checkBucket(OzoneClient ozoneClient, OzoneBucket 
bucket, ArrayNode keysArra
 
   void processKey(OzoneClient ozoneClient, String volumeName, String 
bucketName, String keyName,
       ArrayNode keysArray, AtomicBoolean allKeysPassed) throws IOException {
+    keysProcessed.incrementAndGet();
     OmKeyInfo keyInfo = ozoneClient.getProxy().getKeyInfo(
         volumeName, bucketName, keyName, false);
 
@@ -167,6 +240,7 @@ void processKey(OzoneClient ozoneClient, String volumeName, 
String bucketName, S
 
     ArrayNode blocksArray = keyNode.putArray("blocks");
     boolean keyPass = true;
+    Set<String> failedVerificationTypes = new HashSet<>();
 
     for (OmKeyLocationInfo keyLocation : 
keyInfo.getLatestVersionLocations().getBlocksLatestVersionOnly()) {
       long containerID = keyLocation.getContainerID();
@@ -205,6 +279,7 @@ void processKey(OzoneClient ozoneClient, String volumeName, 
String bucketName, S
 
           if (!result.passed()) {
             replicaPass = false;
+            failedVerificationTypes.add(verifier.getType());
           }
         }
 
@@ -219,8 +294,15 @@ void processKey(OzoneClient ozoneClient, String 
volumeName, String bucketName, S
     }
 
     keyNode.put("pass", keyPass);
-    if (!keyPass) {
+    if (keyPass) {
+      keysPassed.incrementAndGet();
+    } else {
+      keysFailed.incrementAndGet();
       allKeysPassed.set(false);
+      failedVerificationTypes.forEach(failedType -> failuresByType
+          .computeIfAbsent(failedType, k -> new AtomicInteger(0))
+          .incrementAndGet()
+      );
     }
 
     if (!keyPass || allResults) {
@@ -228,6 +310,74 @@ void processKey(OzoneClient ozoneClient, String 
volumeName, String bucketName, S
     }
   }
 
+  /**
+   * Adds ShutdownHook to print summary statistics.
+   */
+  private void addShutdownHook() {
+    ShutdownHookManager.get().addShutdownHook(() -> {
+      if (endTime == 0) {
+        endTime = System.nanoTime();
+      }
+      printSummary(System.err);
+    }, DEFAULT_SHUTDOWN_HOOK_PRIORITY);
+  }
+
+  /**
+   * Prints summary of replica verification run.
+   *
+   * @param out PrintStream
+   */
+  void printSummary(PrintStream out) {
+    if (endTime == 0) {
+      endTime = System.nanoTime();
+    }
+
+    long execTimeNanos = endTime - startTime;
+    String execTime = 
DurationFormatUtils.formatDuration(TimeUnit.NANOSECONDS.toMillis(execTimeNanos),
 DURATION_FORMAT);
+
+    long totalKeysProcessed = keysProcessed.get();
+    long totalKeysPassed = keysPassed.get();
+    long totalKeysFailed = keysFailed.get();
+
+    out.println();
+    out.println("***************************************************");
+    out.println("REPLICA VERIFICATION SUMMARY");
+    out.println("***************************************************");
+    out.println("Status: " + (exception != null ? "Failed" :
+        (totalKeysFailed == 0 ? "Success" : "Completed with failures")));
+    out.println("Verification Scope: " + verificationScope);
+    out.println("Verification Types: " + String.join(", ", verificationTypes));
+    out.println("URI: " + uri);
+    out.println();
+    out.println("Number of Volumes processed: " + volumesProcessed.get());
+    out.println("Number of Buckets processed: " + bucketsProcessed.get());
+    out.println("Number of Keys processed: " + totalKeysProcessed);
+    out.println();
+    out.println("Keys passed verification: " + totalKeysPassed);
+    out.println("Keys failed verification: " + totalKeysFailed);
+
+    if (!failuresByType.isEmpty() && totalKeysFailed > 0) {
+      out.println();
+      for (String verificationType : verificationTypes) {
+        long typeFailures = failuresByType.get(verificationType).get();
+        if (typeFailures > 0) {
+          out.println("Keys failed " + verificationType + " verification: " + 
typeFailures);
+        }
+      }
+      out.println("Note: A key may fail multiple verification types, so total 
may exceed overall failures.");
+    }
+
+    out.println();
+    out.println("Total Execution time: " + execTime);
+
+    if (exception != null) {
+      out.println();
+      out.println("Exception: " + exception.getClass().getSimpleName() + ": " 
+ exception.getMessage());
+    }
+
+    out.println("***************************************************");
+  }
+
   /**
    * Check if the key should be processed based on replication config.
    * @param keyInfo the key to check


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to