This is an automated email from the ASF dual-hosted git repository.
erose pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new d8243691b9 HDDS-12990. Generate tree from metadata when it doesn't
exist during getContainerChecksumInfo call (#8881)
d8243691b9 is described below
commit d8243691b94a69087f7c761f3853727982358512
Author: Aswin Shakil Balasubramanian <[email protected]>
AuthorDate: Wed Aug 6 14:43:33 2025 -0700
HDDS-12990. Generate tree from metadata when it doesn't exist during
getContainerChecksumInfo call (#8881)
---
.../checksum/ContainerChecksumTreeManager.java | 3 +-
.../ozone/container/keyvalue/KeyValueHandler.java | 36 +++++++++++++++++-----
.../TestContainerCommandReconciliation.java | 36 +++++++++++++++++++---
3 files changed, 60 insertions(+), 15 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
index 0f14a17249..ce124754f0 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
@@ -30,7 +30,6 @@
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
-import java.nio.file.NoSuchFileException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
@@ -407,7 +406,7 @@ private SortedSet<Long>
getDeletedBlockIDs(ContainerProtos.ContainerChecksumInfo
public ByteString getContainerChecksumInfo(KeyValueContainerData data)
throws IOException {
File checksumFile = getContainerChecksumFile(data);
if (!checksumFile.exists()) {
- throw new NoSuchFileException("Checksum file does not exist for
container #" + data.getContainerID());
+ throw new FileNotFoundException("Checksum file does not exist for
container #" + data.getContainerID());
}
try (InputStream inStream = Files.newInputStream(checksumFile.toPath())) {
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 9963a2f7f6..bea4aa0af8 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -63,6 +63,7 @@
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.Striped;
import java.io.File;
+import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
@@ -760,14 +761,33 @@ ContainerCommandResponseProto
handleGetContainerChecksumInfo(
try {
checksumTree = checksumManager.getContainerChecksumInfo(containerData);
} catch (IOException ex) {
- // For file not found or other inability to read the file, return an
error to the client.
- LOG.error("Error occurred when reading checksum file for container {}",
containerData.getContainerID(), ex);
- return ContainerCommandResponseProto.newBuilder()
- .setCmdType(request.getCmdType())
- .setTraceID(request.getTraceID())
- .setResult(IO_EXCEPTION)
- .setMessage(ex.getMessage())
- .build();
+ // Only build from metadata if the file doesn't exist
+ if (ex instanceof FileNotFoundException) {
+ try {
+ LOG.info("Checksum tree file not found for container {}. Building
merkle tree from container metadata.",
+ containerData.getContainerID());
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
updateAndGetContainerChecksumFromMetadata(kvContainer);
+ checksumTree = checksumInfo.toByteString();
+ } catch (IOException metadataEx) {
+ LOG.error("Failed to build merkle tree from metadata for container
{}",
+ containerData.getContainerID(), metadataEx);
+ return ContainerCommandResponseProto.newBuilder()
+ .setCmdType(request.getCmdType())
+ .setTraceID(request.getTraceID())
+ .setResult(IO_EXCEPTION)
+ .setMessage("Failed to get or build merkle tree: " +
metadataEx.getMessage())
+ .build();
+ }
+ } else {
+ // For other inability to read the file, return an error to the client.
+ LOG.error("Error occurred when reading checksum file for container
{}", containerData.getContainerID(), ex);
+ return ContainerCommandResponseProto.newBuilder()
+ .setCmdType(request.getCmdType())
+ .setTraceID(request.getTraceID())
+ .setResult(IO_EXCEPTION)
+ .setMessage(ex.getMessage())
+ .build();
+ }
}
return getGetContainerMerkleTreeResponse(request, checksumTree);
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
index 17e7012818..359464dc67 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/checksum/TestContainerCommandReconciliation.java
@@ -58,6 +58,7 @@
import static
org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -101,6 +102,7 @@
import org.apache.hadoop.ozone.client.OzoneVolume;
import org.apache.hadoop.ozone.client.io.OzoneOutputStream;
import org.apache.hadoop.ozone.container.TestHelper;
+import org.apache.hadoop.ozone.container.checksum.ContainerChecksumTreeManager;
import org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeWriter;
import org.apache.hadoop.ozone.container.checksum.DNContainerOperationClient;
import org.apache.hadoop.ozone.container.common.helpers.BlockData;
@@ -231,11 +233,10 @@ public void testGetChecksumInfoNonexistentReplica() {
}
/**
- * Tests reading the container checksum info file from a datanode where the
container exists, but the file has not
- * yet been created.
+ * Tests container checksum file creation if it doesn't exist during
getContainerChecksumInfo call.
*/
@Test
- public void testGetChecksumInfoNonexistentFile() throws Exception {
+ public void testMerkleTreeCreationDuringGetChecksumInfo() throws Exception {
String volume = UUID.randomUUID().toString();
String bucket = UUID.randomUUID().toString();
long containerID = writeDataAndGetContainer(true, volume, bucket);
@@ -245,14 +246,39 @@ public void testGetChecksumInfoNonexistentFile() throws
Exception {
.getContainerSet().getContainer(containerID);
File treeFile = getContainerChecksumFile(container.getContainerData());
// Closing the container should have generated the tree file.
+ ContainerProtos.ContainerChecksumInfo srcChecksumInfo =
ContainerChecksumTreeManager.readChecksumInfo(
+ container.getContainerData()).get();
assertTrue(treeFile.exists());
assertTrue(treeFile.delete());
+ ContainerProtos.ContainerChecksumInfo destChecksumInfo =
dnClient.getContainerChecksumInfo(
+ containerID, targetDN.getDatanodeDetails());
+ assertNotNull(destChecksumInfo);
+ assertTreesSortedAndMatch(srcChecksumInfo.getContainerMerkleTree(),
destChecksumInfo.getContainerMerkleTree());
+ }
+
+ /**
+ * Tests reading the container checksum info file from a datanode where
there's an IO error
+ * that's not related to file not found (e.g., permission error). Such
errors should not
+ * trigger fallback to building from metadata.
+ */
+ @Test
+ public void testGetChecksumInfoIOError() throws Exception {
+ String volume = UUID.randomUUID().toString();
+ String bucket = UUID.randomUUID().toString();
+ long containerID = writeDataAndGetContainer(true, volume, bucket);
+ // Pick a datanode and make its checksum file unreadable to simulate
permission error.
+ HddsDatanodeService targetDN = cluster.getHddsDatanodes().get(0);
+ Container<?> container = targetDN.getDatanodeStateMachine().getContainer()
+ .getContainerSet().getContainer(containerID);
+ File treeFile = getContainerChecksumFile(container.getContainerData());
+ assertTrue(treeFile.exists());
+ // Make the server unable to read the file (permission error, not file not
found).
+ assertTrue(treeFile.setReadable(false));
+
StorageContainerException ex =
assertThrows(StorageContainerException.class, () ->
dnClient.getContainerChecksumInfo(containerID,
targetDN.getDatanodeDetails()));
assertEquals(ContainerProtos.Result.IO_EXCEPTION, ex.getResult());
- assertTrue(ex.getMessage().contains("Checksum file does not exist"),
ex.getMessage() +
- " did not contain the expected string");
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]