This is an automated email from the ASF dual-hosted git repository.

erose pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new d8243691b9 HDDS-12990. Generate tree from metadata when it doesn't 
exist during getContainerChecksumInfo call (#8881)
d8243691b9 is described below

commit d8243691b94a69087f7c761f3853727982358512
Author: Aswin Shakil Balasubramanian <[email protected]>
AuthorDate: Wed Aug 6 14:43:33 2025 -0700

    HDDS-12990. Generate tree from metadata when it doesn't exist during 
getContainerChecksumInfo call (#8881)
---
 .../checksum/ContainerChecksumTreeManager.java     |  3 +-
 .../ozone/container/keyvalue/KeyValueHandler.java  | 36 +++++++++++++++++-----
 .../TestContainerCommandReconciliation.java        | 36 +++++++++++++++++++---
 3 files changed, 60 insertions(+), 15 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
index 0f14a17249..ce124754f0 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
@@ -30,7 +30,6 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.file.Files;
-import java.nio.file.NoSuchFileException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@@ -407,7 +406,7 @@ private SortedSet<Long> 
getDeletedBlockIDs(ContainerProtos.ContainerChecksumInfo
   public ByteString getContainerChecksumInfo(KeyValueContainerData data) 
throws IOException {
     File checksumFile = getContainerChecksumFile(data);
     if (!checksumFile.exists()) {
-      throw new NoSuchFileException("Checksum file does not exist for 
container #" + data.getContainerID());
+      throw new FileNotFoundException("Checksum file does not exist for 
container #" + data.getContainerID());
     }
 
     try (InputStream inStream = Files.newInputStream(checksumFile.toPath())) {
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 9963a2f7f6..bea4aa0af8 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -63,6 +63,7 @@
 import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.Striped;
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.FilenameFilter;
 import java.io.IOException;
 import java.io.InputStream;
@@ -760,14 +761,33 @@ ContainerCommandResponseProto 
handleGetContainerChecksumInfo(
     try {
       checksumTree = checksumManager.getContainerChecksumInfo(containerData);
     } catch (IOException ex) {
-      // For file not found or other inability to read the file, return an 
error to the client.
-      LOG.error("Error occurred when reading checksum file for container {}", 
containerData.getContainerID(), ex);
-      return ContainerCommandResponseProto.newBuilder()
-          .setCmdType(request.getCmdType())
-          .setTraceID(request.getTraceID())
-          .setResult(IO_EXCEPTION)
-          .setMessage(ex.getMessage())
-          .build();
+      // Only build from metadata if the file doesn't exist
+      if (ex instanceof FileNotFoundException) {
+        try {
+          LOG.info("Checksum tree file not found for container {}. Building 
merkle tree from container metadata.",
+              containerData.getContainerID());
+          ContainerProtos.ContainerChecksumInfo checksumInfo = 
updateAndGetContainerChecksumFromMetadata(kvContainer);
+          checksumTree = checksumInfo.toByteString();
+        } catch (IOException metadataEx) {
+          LOG.error("Failed to build merkle tree from metadata for container 
{}",
+              containerData.getContainerID(), metadataEx);
+          return ContainerCommandResponseProto.newBuilder()
+              .setCmdType(request.getCmdType())
+              .setTraceID(request.getTraceID())
+              .setResult(IO_EXCEPTION)
+              .setMessage("Failed to get or build merkle tree: " + 
metadataEx.getMessage())
+              .build();
+        }
+      } else {
+        // For other inability to read the file, return an error to the client.
+        LOG.error("Error occurred when reading checksum file for container 
{}", containerData.getContainerID(), ex);
+        return ContainerCommandResponseProto.newBuilder()
+            .setCmdType(request.getCmdType())
+            .setTraceID(request.getTraceID())
+            .setResult(IO_EXCEPTION)
+            .setMessage(ex.getMessage())
+            .build();
+      }
     }
 
     return getGetContainerMerkleTreeResponse(request, checksumTree);
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
index 17e7012818..359464dc67 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
@@ -58,6 +58,7 @@
 import static 
org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -101,6 +102,7 @@
 import org.apache.hadoop.ozone.client.OzoneVolume;
 import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
 import org.apache.hadoop.ozone.container.TestHelper;
+import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager;
 import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter;
 import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient;
 import org.apache.hadoop.ozone.container.common.helpers.BlockData;
@@ -231,11 +233,10 @@ public void testGetChecksumInfoNonexistentReplica() {
   }
 
   /**
-   * Tests reading the container checksum info file from a datanode where the 
container exists, but the file has not
-   * yet been created.
+   * Tests container checksum file creation if it doesn't exist during 
getContainerChecksumInfo call.
    */
   @Test
-  public void testGetChecksumInfoNonexistentFile() throws Exception {
+  public void testMerkleTreeCreationDuringGetChecksumInfo() throws Exception {
     String volume = UUID.randomUUID().toString();
     String bucket = UUID.randomUUID().toString();
     long containerID = writeDataAndGetContainer(true, volume, bucket);
@@ -245,14 +246,39 @@ public void testGetChecksumInfoNonexistentFile() throws 
Exception {
         .getContainerSet().getContainer(containerID);
     File treeFile = getContainerChecksumFile(container.getContainerData());
     // Closing the container should have generated the tree file.
+    ContainerProtos.ContainerChecksumInfo srcChecksumInfo = 
ContainerChecksumTreeManager.readChecksumInfo(
+        container.getContainerData()).get();
     assertTrue(treeFile.exists());
     assertTrue(treeFile.delete());
 
+    ContainerProtos.ContainerChecksumInfo destChecksumInfo = 
dnClient.getContainerChecksumInfo(
+        containerID, targetDN.getDatanodeDetails());
+    assertNotNull(destChecksumInfo);
+    assertTreesSortedAndMatch(srcChecksumInfo.getContainerMerkleTree(), 
destChecksumInfo.getContainerMerkleTree());
+  }
+
+  /**
+   * Tests reading the container checksum info file from a datanode where 
there's an IO error 
+   * that's not related to file not found (e.g., permission error). Such 
errors should not 
+   * trigger fallback to building from metadata.
+   */
+  @Test
+  public void testGetChecksumInfoIOError() throws Exception {
+    String volume = UUID.randomUUID().toString();
+    String bucket = UUID.randomUUID().toString();
+    long containerID = writeDataAndGetContainer(true, volume, bucket);
+    // Pick a datanode and make its checksum file unreadable to simulate 
permission error.
+    HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0);
+    Container<?> container = targetDN.getDatanodeStateMachine().getContainer()
+        .getContainerSet().getContainer(containerID);
+    File treeFile = getContainerChecksumFile(container.getContainerData());
+    assertTrue(treeFile.exists());
+    // Make the server unable to read the file (permission error, not file not 
found).
+    assertTrue(treeFile.setReadable(false));
+
     StorageContainerException ex = 
assertThrows(StorageContainerException.class, () ->
         dnClient.getContainerChecksumInfo(containerID, 
targetDN.getDatanodeDetails()));
     assertEquals(ContainerProtos.Result.IO_EXCEPTION, ex.getResult());
-    assertTrue(ex.getMessage().contains("Checksum file does not exist"), 
ex.getMessage() +
-        " did not contain the expected string");
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to