This is an automated email from the ASF dual-hosted git repository.
aswinshakil pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 1c2c7f340c HDDS-13083. Handle cases where block deletion generates
tree file before scanner (#8565)
1c2c7f340c is described below
commit 1c2c7f340cc4e595b3dbc94c5ece71d44e53a638
Author: Ethan Rose <[email protected]>
AuthorDate: Wed Jul 23 14:33:01 2025 -0400
HDDS-13083. Handle cases where block deletion generates tree file before
scanner (#8565)
---
.../checksum/ContainerChecksumTreeManager.java | 83 ++++++++++------------
.../container/checksum/ContainerDiffReport.java | 10 ++-
.../ozone/container/common/impl/ContainerData.java | 14 +++-
.../ozone/container/keyvalue/KeyValueHandler.java | 28 +++-----
.../checksum/ContainerMerkleTreeTestUtils.java | 6 +-
.../checksum/TestContainerChecksumTreeManager.java | 37 +++++-----
.../common/TestKeyValueContainerData.java | 29 ++++++++
...stContainerReconciliationWithMockDatanodes.java | 12 ++--
.../keyvalue/TestKeyValueContainerCheck.java | 6 +-
.../container/keyvalue/TestKeyValueHandler.java | 8 ++-
.../TestKeyValueHandlerWithUnhealthyContainer.java | 4 +-
.../scm/cli/container/ReconcileSubcommand.java | 2 +-
...tBackgroundContainerDataScannerIntegration.java | 32 +++++----
.../TestContainerScannerIntegrationAbstract.java | 15 ++++
14 files changed, 171 insertions(+), 115 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
index b52d0667c7..8a1afc5a6b 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerChecksumTreeManager.java
@@ -40,6 +40,7 @@
import java.util.TreeSet;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.BiConsumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
@@ -47,7 +48,6 @@
import
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.hdds.utils.SimpleStriped;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
-import org.apache.hadoop.ozone.container.common.interfaces.Container;
import
org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
@@ -94,15 +94,7 @@ public ContainerProtos.ContainerChecksumInfo
writeContainerDataTree(ContainerDat
Lock writeLock = getLock(containerID);
writeLock.lock();
try {
- ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = null;
- try {
- // If the file is not present, we will create the data for the first
time. This happens under a write lock.
- checksumInfoBuilder =
readBuilder(data).orElse(ContainerProtos.ContainerChecksumInfo.newBuilder());
- } catch (IOException ex) {
- LOG.error("Failed to read container checksum tree file for container
{}. Creating a new instance.",
- containerID, ex);
- checksumInfoBuilder =
ContainerProtos.ContainerChecksumInfo.newBuilder();
- }
+ ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder =
readOrCreate(data).toBuilder();
ContainerProtos.ContainerMerkleTree treeProto =
captureLatencyNs(metrics.getCreateMerkleTreeLatencyNS(),
tree::toProto);
@@ -129,16 +121,7 @@ public void markBlocksAsDeleted(KeyValueContainerData
data, Collection<Long> del
Lock writeLock = getLock(containerID);
writeLock.lock();
try {
- ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder = null;
- try {
- // If the file is not present, we will create the data for the first
time. This happens under a write lock.
- checksumInfoBuilder = readBuilder(data)
- .orElse(ContainerProtos.ContainerChecksumInfo.newBuilder());
- } catch (IOException ex) {
- LOG.error("Failed to read container checksum tree file for container
{}. Overwriting it with a new instance.",
- data.getContainerID(), ex);
- checksumInfoBuilder =
ContainerProtos.ContainerChecksumInfo.newBuilder();
- }
+ ContainerProtos.ContainerChecksumInfo.Builder checksumInfoBuilder =
readOrCreate(data).toBuilder();
// Although the persisted block list should already be sorted, we will
sort it here to make sure.
// This will automatically fix any bugs in the persisted order that may
show up.
@@ -179,7 +162,7 @@ public ContainerDiffReport
diff(ContainerProtos.ContainerChecksumInfo thisChecks
ContainerProtos.ContainerChecksumInfo
peerChecksumInfo) throws
StorageContainerException {
- ContainerDiffReport report = new ContainerDiffReport();
+ ContainerDiffReport report = new
ContainerDiffReport(thisChecksumInfo.getContainerID());
try {
captureLatencyNs(metrics.getMerkleTreeDiffLatencyNS(), () -> {
Preconditions.assertNotNull(thisChecksumInfo, "Datanode's checksum
info is null.");
@@ -280,6 +263,8 @@ private void
compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer
List<ContainerProtos.ChunkMerkleTree> thisChunkMerkleTreeList =
thisBlockMerkleTree.getChunkMerkleTreeList();
List<ContainerProtos.ChunkMerkleTree> peerChunkMerkleTreeList =
peerBlockMerkleTree.getChunkMerkleTreeList();
int thisIdx = 0, peerIdx = 0;
+ long containerID = report.getContainerID();
+ long blockID = thisBlockMerkleTree.getBlockID();
// Step 1: Process both lists while elements are present in both
while (thisIdx < thisChunkMerkleTreeList.size() && peerIdx <
peerChunkMerkleTreeList.size()) {
@@ -293,8 +278,8 @@ private void
compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer
// thisTree = Healthy, peerTree = unhealthy -> Do nothing as thisTree
is healthy.
// thisTree = Unhealthy, peerTree = Unhealthy -> Do Nothing as both
are corrupt.
if (thisChunkMerkleTree.getDataChecksum() !=
peerChunkMerkleTree.getDataChecksum() &&
- !thisChunkMerkleTree.getChecksumMatches() &&
peerChunkMerkleTree.getChecksumMatches()) {
- report.addCorruptChunk(peerBlockMerkleTree.getBlockID(),
peerChunkMerkleTree);
+ !thisChunkMerkleTree.getChecksumMatches()) {
+ reportChunkIfHealthy(containerID, blockID, peerChunkMerkleTree,
report::addCorruptChunk);
}
thisIdx++;
peerIdx++;
@@ -304,14 +289,14 @@ private void
compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer
thisIdx++;
} else {
// Peer chunk's offset is smaller; record missing chunk and advance
peerIdx
- report.addMissingChunk(peerBlockMerkleTree.getBlockID(),
peerChunkMerkleTree);
+ reportChunkIfHealthy(containerID, blockID, peerChunkMerkleTree,
report::addMissingChunk);
peerIdx++;
}
}
// Step 2: Process remaining chunks in the peer list
while (peerIdx < peerChunkMerkleTreeList.size()) {
- report.addMissingChunk(peerBlockMerkleTree.getBlockID(),
peerChunkMerkleTreeList.get(peerIdx));
+ reportChunkIfHealthy(containerID, blockID,
peerChunkMerkleTreeList.get(peerIdx), report::addMissingChunk);
peerIdx++;
}
@@ -319,15 +304,18 @@ private void
compareBlockMerkleTree(ContainerProtos.BlockMerkleTree thisBlockMer
// chunks from us when they reconcile.
}
- public static long getDataChecksum(ContainerProtos.ContainerChecksumInfo
checksumInfo) {
- return checksumInfo.getContainerMerkleTree().getDataChecksum();
+ private void reportChunkIfHealthy(long containerID, long blockID,
ContainerProtos.ChunkMerkleTree peerTree,
+ BiConsumer<Long, ContainerProtos.ChunkMerkleTree> addToReport) {
+ if (peerTree.getChecksumMatches()) {
+ addToReport.accept(blockID, peerTree);
+ } else {
+ LOG.warn("Skipping chunk at offset {} in block {} of container {} since
peer reported it as " +
+ "unhealthy.", peerTree.getOffset(), blockID, containerID);
+ }
}
- /**
- * Returns whether the container checksum tree file for the specified
container exists without deserializing it.
- */
- public static boolean hasContainerChecksumFile(ContainerData data) {
- return getContainerChecksumFile(data).exists();
+ public static long getDataChecksum(ContainerProtos.ContainerChecksumInfo
checksumInfo) {
+ return checksumInfo.getContainerMerkleTree().getDataChecksum();
}
/**
@@ -348,21 +336,34 @@ private Lock getLock(long containerID) {
}
/**
+ * Reads the checksum info of the specified container. If the tree file with
the information does not exist, an empty
+ * instance is returned.
* Callers are not required to hold a lock while calling this since writes
are done to a tmp file and atomically
* swapped into place.
*/
- public Optional<ContainerProtos.ContainerChecksumInfo> read(ContainerData
data) throws IOException {
+ public ContainerProtos.ContainerChecksumInfo read(ContainerData data) throws
IOException {
try {
- return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(),
() -> readChecksumInfo(data));
+ return captureLatencyNs(metrics.getReadContainerMerkleTreeLatencyNS(),
() ->
+
readChecksumInfo(data).orElse(ContainerProtos.ContainerChecksumInfo.newBuilder().build()));
} catch (IOException ex) {
metrics.incrementMerkleTreeReadFailures();
- throw new IOException(ex);
+ throw ex;
}
}
- private Optional<ContainerProtos.ContainerChecksumInfo.Builder>
readBuilder(ContainerData data) throws IOException {
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo = read(data);
- return checksumInfo.map(ContainerProtos.ContainerChecksumInfo::toBuilder);
+ /**
+ * Reads the checksum info of the specified container. If the tree file with
the information does not exist, or there
+ * is an exception trying to read the file, an empty instance is returned.
+ */
+ private ContainerProtos.ContainerChecksumInfo readOrCreate(ContainerData
data) {
+ try {
+ // If the file is not present, we will create the data for the first
time. This happens under a write lock.
+ return read(data);
+ } catch (IOException ex) {
+ LOG.error("Failed to read container checksum tree file for container {}.
Overwriting it with a new instance.",
+ data.getContainerID(), ex);
+ return ContainerProtos.ContainerChecksumInfo.newBuilder().build();
+ }
}
/**
@@ -443,10 +444,4 @@ public static
Optional<ContainerProtos.ContainerChecksumInfo> readChecksumInfo(C
public ContainerMerkleTreeMetrics getMetrics() {
return this.metrics;
}
-
- public static boolean checksumFileExist(Container<?> container) {
- File checksumFile = getContainerChecksumFile(container.getContainerData());
- return checksumFile.exists();
- }
-
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java
index 6de3057b00..882c9a7e83 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/checksum/ContainerDiffReport.java
@@ -31,11 +31,17 @@ public class ContainerDiffReport {
private final List<ContainerProtos.BlockMerkleTree> missingBlocks;
private final Map<Long, List<ContainerProtos.ChunkMerkleTree>> missingChunks;
private final Map<Long, List<ContainerProtos.ChunkMerkleTree>> corruptChunks;
+ private final long containerID;
- public ContainerDiffReport() {
+ public ContainerDiffReport(long containerID) {
this.missingBlocks = new ArrayList<>();
this.missingChunks = new HashMap<>();
this.corruptChunks = new HashMap<>();
+ this.containerID = containerID;
+ }
+
+ public long getContainerID() {
+ return containerID;
}
/**
@@ -105,7 +111,7 @@ public long getNumMissingBlocks() {
@Override
public String toString() {
- return "ContainerDiffReport:" +
+ return "Diff report for container " + containerID + ":" +
" Missing Blocks: " + getNumMissingBlocks() +
" Missing Chunks: " + getNumMissingChunks() + " chunks from " +
missingChunks.size() + " blocks" +
" Corrupt Chunks: " + getNumCorruptChunks() + " chunks from " +
corruptChunks.size() + " blocks";
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
index 11092fe1f7..f79c7e3f1d 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
@@ -99,6 +99,7 @@ public abstract class ContainerData {
// Checksum of the data within the container.
private long dataChecksum;
+ private static final long UNSET_DATA_CHECKSUM = -1;
private boolean isEmpty;
@@ -153,7 +154,7 @@ protected ContainerData(ContainerType type, long
containerId,
this.originNodeId = originNodeId;
this.isEmpty = false;
this.checksum = ZERO_CHECKSUM;
- this.dataChecksum = 0;
+ this.dataChecksum = UNSET_DATA_CHECKSUM;
}
protected ContainerData(ContainerData source) {
@@ -538,13 +539,24 @@ public void computeAndSetContainerFileChecksum(Yaml yaml)
throws IOException {
}
public void setDataChecksum(long dataChecksum) {
+ if (dataChecksum < 0) {
+ throw new IllegalArgumentException("Data checksum cannot be set to a
negative number.");
+ }
this.dataChecksum = dataChecksum;
}
public long getDataChecksum() {
+ // UNSET_DATA_CHECKSUM is an internal placeholder, it should not be used
outside this class.
+ if (needsDataChecksum()) {
+ return 0;
+ }
return dataChecksum;
}
+ public boolean needsDataChecksum() {
+ return dataChecksum == UNSET_DATA_CHECKSUM;
+ }
+
/**
* Returns a ProtoBuf Message from ContainerData.
*
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index a2dbdcef7f..b79c0b7805 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -81,7 +81,6 @@
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
-import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.locks.Lock;
@@ -1368,7 +1367,6 @@ public void markContainerForClose(Container container)
} finally {
container.writeUnlock();
}
- updateContainerChecksumFromMetadataIfNeeded(container);
ContainerLogger.logClosing(container.getContainerData());
sendICR(container);
}
@@ -1383,7 +1381,7 @@ public void updateContainerChecksum(Container container,
ContainerMerkleTreeWrit
* Write the merkle tree for this container using the existing checksum
metadata only. The data is not read or
* validated by this method, so it is expected to run quickly.
* <p>
- * If a checksum file already exists on the disk, this method will do
nothing. The existing file would have either
+ * If a data checksum for the container already exists, this method does
nothing. The existing value would have either
* been made from the metadata or data itself so there is no need to
recreate it from the metadata. This method
* does not send an ICR with the updated checksum info.
* <p>
@@ -1391,7 +1389,7 @@ public void updateContainerChecksum(Container container,
ContainerMerkleTreeWrit
* @param container The container which will have a tree generated.
*/
private void updateContainerChecksumFromMetadataIfNeeded(Container
container) {
- if (ContainerChecksumTreeManager.checksumFileExist(container)) {
+ if (!container.getContainerData().needsDataChecksum()) {
return;
}
@@ -1435,24 +1433,24 @@ private ContainerProtos.ContainerChecksumInfo
updateAndGetContainerChecksum(Cont
// checksum to prevent divergence from what SCM sees in the ICR vs what
datanode peers will see when pulling the
// merkle tree.
long originalDataChecksum = containerData.getDataChecksum();
+ boolean hadDataChecksum = !containerData.needsDataChecksum();
ContainerProtos.ContainerChecksumInfo updateChecksumInfo =
checksumManager.writeContainerDataTree(containerData,
treeWriter);
long updatedDataChecksum =
updateChecksumInfo.getContainerMerkleTree().getDataChecksum();
if (updatedDataChecksum != originalDataChecksum) {
containerData.setDataChecksum(updatedDataChecksum);
- String message =
- "Container data checksum updated from " +
checksumToString(originalDataChecksum) + " to " +
- checksumToString(updatedDataChecksum);
if (sendICR) {
sendICR(container);
}
- if
(ContainerChecksumTreeManager.hasContainerChecksumFile(containerData)) {
+
+ String message = "Container " + containerData.getContainerID() + " data
checksum updated from " +
+ checksumToString(originalDataChecksum) + " to " +
checksumToString(updatedDataChecksum);
+ if (hadDataChecksum) {
LOG.warn(message);
ContainerLogger.logChecksumUpdated(containerData,
originalDataChecksum);
} else {
- // If this is the first time the scanner has run with the feature to
generate a checksum file, don't
- // log a warning for the checksum update.
+ // If this is the first time the checksum is being generated, don't
log a warning about updating the checksum.
LOG.debug(message);
}
}
@@ -1465,7 +1463,6 @@ public void markContainerUnhealthy(Container container,
ScanResult reason)
container.writeLock();
long containerID = 0L;
try {
- containerID = container.getContainerData().getContainerID();
if (container.getContainerState() == State.UNHEALTHY) {
LOG.debug("Call to mark already unhealthy container {} as unhealthy",
containerID);
@@ -1572,12 +1569,9 @@ public void
reconcileContainer(DNContainerOperationClient dnClient, Container<?>
long containerID = containerData.getContainerID();
// Obtain the original checksum info before reconciling with any peers.
- Optional<ContainerProtos.ContainerChecksumInfo> optionalChecksumInfo =
checksumManager.read(containerData);
- ContainerProtos.ContainerChecksumInfo originalChecksumInfo;
- if (optionalChecksumInfo.isPresent()) {
- originalChecksumInfo = optionalChecksumInfo.get();
- } else {
- // Try creating the checksum info from RocksDB metadata if it is not
present.
+ ContainerProtos.ContainerChecksumInfo originalChecksumInfo =
checksumManager.read(containerData);
+ if (!originalChecksumInfo.hasContainerMerkleTree()) {
+ // Try creating the merkle tree from RocksDB metadata if it is not
present.
originalChecksumInfo =
updateAndGetContainerChecksumFromMetadata(kvContainer);
}
// This holds our current most up-to-date checksum info that we are using
for the container.
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java
index 730c2c8b65..47842ab23a 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/ContainerMerkleTreeTestUtils.java
@@ -161,7 +161,7 @@ public static ContainerMerkleTreeWriter
buildTestTree(ConfigurationSource conf,
int numCorruptChunks) {
ContainerProtos.ContainerMerkleTree.Builder treeBuilder =
originalTree.toProto().toBuilder();
- ContainerDiffReport diff = new ContainerDiffReport();
+ ContainerDiffReport diff = new ContainerDiffReport(1);
introduceMissingBlocks(treeBuilder, numMissingBlocks, diff);
introduceMissingChunks(treeBuilder, numMissingChunks, diff);
@@ -323,12 +323,12 @@ private static void
assertEqualsChunkMerkleTree(List<ContainerProtos.ChunkMerkle
}
/**
- * This function checks whether the container checksum file exists.
+ * This function checks whether the container checksum file exists for a
container in a given datanode.
*/
public static boolean containerChecksumFileExists(HddsDatanodeService
hddsDatanode, long containerID) {
OzoneContainer ozoneContainer =
hddsDatanode.getDatanodeStateMachine().getContainer();
Container<?> container =
ozoneContainer.getController().getContainer(containerID);
- return ContainerChecksumTreeManager.checksumFileExist(container);
+ return getContainerChecksumFile(container.getContainerData()).exists();
}
public static void writeContainerDataTreeProto(ContainerData data,
ContainerProtos.ContainerMerkleTree tree)
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
index 36b705c992..56592efe1a 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/checksum/TestContainerChecksumTreeManager.java
@@ -39,7 +39,6 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
-import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.Pair;
@@ -344,8 +343,8 @@ public void testContainerWithNoDiff() throws Exception {
ContainerProtos.ContainerChecksumInfo peerChecksumInfo =
ContainerProtos.ContainerChecksumInfo.newBuilder()
.setContainerID(container.getContainerID())
.setContainerMerkleTree(peerMerkleTree.toProto()).build();
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(),
peerChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ ContainerDiffReport diff = checksumManager.diff(checksumInfo,
peerChecksumInfo);
assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total()
> 0);
assertFalse(diff.needsRepair());
assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1);
@@ -368,8 +367,8 @@ public void testContainerDiffWithMismatches(int
numMissingBlock, int numMissingC
ContainerProtos.ContainerChecksumInfo peerChecksumInfo =
ContainerProtos.ContainerChecksumInfo.newBuilder()
.setContainerID(container.getContainerID())
.setContainerMerkleTree(peerMerkleTree.toProto()).build();
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(),
peerChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ ContainerDiffReport diff = checksumManager.diff(checksumInfo,
peerChecksumInfo);
assertTrue(checksumManager.getMetrics().getMerkleTreeDiffLatencyNS().lastStat().total()
> 0);
assertContainerDiffMatch(expectedDiff, diff);
assertEquals(1, checksumManager.getMetrics().getRepairContainerDiffs());
@@ -395,8 +394,8 @@ public void testPeerWithMismatchesHasNoDiff(int
numMissingBlock, int numMissingC
ContainerProtos.ContainerChecksumInfo peerChecksumInfo =
ContainerProtos.ContainerChecksumInfo.newBuilder()
.setContainerID(container.getContainerID())
.setContainerMerkleTree(peerMerkleTree).build();
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- ContainerDiffReport diff = checksumManager.diff(checksumInfo.get(),
peerChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ ContainerDiffReport diff = checksumManager.diff(checksumInfo,
peerChecksumInfo);
assertFalse(diff.needsRepair());
assertEquals(checksumManager.getMetrics().getNoRepairContainerDiffs(), 1);
assertEquals(0, checksumManager.getMetrics().getMissingBlocksIdentified());
@@ -409,8 +408,8 @@ public void testFailureContainerMerkleTreeMetric() throws
IOException {
ContainerProtos.ContainerChecksumInfo peerChecksum =
ContainerProtos.ContainerChecksumInfo.newBuilder().build();
ContainerMerkleTreeWriter ourMerkleTree = buildTestTree(config);
checksumManager.writeContainerDataTree(container, ourMerkleTree);
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- assertThrows(StorageContainerException.class, () ->
checksumManager.diff(checksumInfo.get(), peerChecksum));
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ assertThrows(StorageContainerException.class, () ->
checksumManager.diff(checksumInfo, peerChecksum));
assertEquals(checksumManager.getMetrics().getMerkleTreeDiffFailure(), 1);
}
@@ -436,8 +435,8 @@ void testDeletedBlocksInPeerAndBoth() throws Exception {
.addAllDeletedBlocks(deletedBlockList).build();
writeContainerDataTreeProto(container, ourMerkleTree);
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- ContainerDiffReport containerDiff =
checksumManager.diff(checksumInfo.get(), peerChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo,
peerChecksumInfo);
// The diff should not have any missing block/missing chunk/corrupt chunks
as the blocks are deleted
// in peer merkle tree.
@@ -448,7 +447,7 @@ void testDeletedBlocksInPeerAndBoth() throws Exception {
// Delete blocks in our merkle tree as well.
checksumManager.markBlocksAsDeleted(container, blockIDs);
checksumInfo = checksumManager.read(container);
- containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo);
+ containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo);
// The diff should not have any missing block/missing chunk/corrupt chunks
as the blocks are deleted
// in both merkle tree.
@@ -474,8 +473,8 @@ void testDeletedBlocksInOurContainerOnly() throws Exception
{
writeContainerDataTreeProto(container, ourMerkleTree);
checksumManager.markBlocksAsDeleted(container, deletedBlockList);
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- ContainerDiffReport containerDiff =
checksumManager.diff(checksumInfo.get(), peerChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo,
peerChecksumInfo);
// The diff should not have any missing block/missing chunk/corrupt chunks
as the blocks are deleted
// in our merkle tree.
@@ -507,8 +506,8 @@ void testCorruptionInOurMerkleTreeAndDeletedBlocksInPeer()
throws Exception {
writeContainerDataTreeProto(container, ourMerkleTree);
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- ContainerDiffReport containerDiff =
checksumManager.diff(checksumInfo.get(), peerChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo,
peerChecksumInfo);
// The diff should not have any missing block/missing chunk/corrupt chunks
as the blocks are deleted
// in peer merkle tree.
@@ -539,8 +538,8 @@ void testContainerDiffWithBlockDeletionInPeer() throws
Exception {
writeContainerDataTreeProto(container, ourMerkleTree);
ContainerProtos.ContainerChecksumInfo peerChecksumInfo =
peerChecksumInfoBuilder.build();
- Optional<ContainerProtos.ContainerChecksumInfo> checksumInfo =
checksumManager.read(container);
- ContainerDiffReport containerDiff =
checksumManager.diff(checksumInfo.get(), peerChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(container);
+ ContainerDiffReport containerDiff = checksumManager.diff(checksumInfo,
peerChecksumInfo);
// The diff should not have any missing block/missing chunk/corrupt chunks
as the blocks are deleted
// in peer merkle tree.
assertFalse(containerDiff.getMissingBlocks().isEmpty());
@@ -553,7 +552,7 @@ void testContainerDiffWithBlockDeletionInPeer() throws
Exception {
// Clear deleted blocks to add them in missing blocks.
peerChecksumInfo = peerChecksumInfoBuilder.clearDeletedBlocks().build();
checksumInfo = checksumManager.read(container);
- containerDiff = checksumManager.diff(checksumInfo.get(), peerChecksumInfo);
+ containerDiff = checksumManager.diff(checksumInfo, peerChecksumInfo);
assertFalse(containerDiff.getMissingBlocks().isEmpty());
// Missing block does not contain the deleted blocks 6L to 10L
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java
index 6af2e00ade..5d674de7fc 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestKeyValueContainerData.java
@@ -18,6 +18,9 @@
package org.apache.hadoop.ozone.container.common;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import java.util.UUID;
@@ -116,4 +119,30 @@ public void testKeyValueData(ContainerTestVersionInfo
versionInfo) {
assertEquals(kvData.getSchemaVersion(), newKvData.getSchemaVersion());
}
+ @ContainerTestVersionInfo.ContainerTest
+ public void testNeedsDataChecksum(ContainerTestVersionInfo versionInfo) {
+ initVersionInfo(versionInfo);
+
+ KeyValueContainerData containerData = new KeyValueContainerData(1, layout,
MAXSIZE, UUID.randomUUID().toString(),
+ UUID.randomUUID().toString());
+
+ // When the container is initially created without a checksum, the
checksum will be 0 but the container still
+ // indicates it needs the actual one generated.
+ assertFalse(containerData.isEmpty());
+ assertTrue(containerData.needsDataChecksum());
+ assertEquals(0, containerData.getDataChecksum());
+
+ // Once the setter is called with any value, the container should no
longer consider the checksum missing.
+ containerData.setDataChecksum(0);
+ assertFalse(containerData.needsDataChecksum());
+ assertEquals(0, containerData.getDataChecksum());
+
+ containerData.setDataChecksum(123L);
+ assertFalse(containerData.isEmpty());
+ assertFalse(containerData.needsDataChecksum());
+ assertEquals(123L, containerData.getDataChecksum());
+
+ assertThrows(IllegalArgumentException.class, () ->
containerData.setDataChecksum(-1L),
+ "Negative checksum value should throw an exception.");
+ }
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java
index 7fce76a0e4..68b144d97b 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestContainerReconciliationWithMockDatanodes.java
@@ -27,6 +27,7 @@
import static org.assertj.core.api.Assertions.fail;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
@@ -159,12 +160,16 @@ public static void setup() throws Exception {
// Use this fake host name to track the node through the test since it's
easier to visualize than a UUID.
dnDetails.setHostName("dn" + (i + 1));
MockDatanode dn = new MockDatanode(dnDetails, containerDir);
+ // This will close the container and build a data checksum based on the
chunk checksums in the metadata.
dn.addContainerWithBlocks(CONTAINER_ID, 15);
datanodes.add(dn);
}
+ long dataChecksumFromMetadata = assertUniqueChecksumCount(CONTAINER_ID,
datanodes, 1);
+ assertNotEquals(0, dataChecksumFromMetadata);
datanodes.forEach(d -> d.scanContainer(CONTAINER_ID));
healthyDataChecksum = assertUniqueChecksumCount(CONTAINER_ID, datanodes,
1);
+ assertEquals(dataChecksumFromMetadata, healthyDataChecksum);
// Do not count the initial synchronous scan to build the merkle tree
towards the scan count in the tests.
// This lets each test run start counting the number of scans from zero.
datanodes.forEach(MockDatanode::resetOnDemandScanCount);
@@ -352,10 +357,9 @@ public long checkAndGetDataChecksum(long containerID) {
KeyValueContainer container = getContainer(containerID);
long dataChecksum = 0;
try {
- Optional<ContainerProtos.ContainerChecksumInfo> containerChecksumInfo =
- handler.getChecksumManager().read(container.getContainerData());
- assertTrue(containerChecksumInfo.isPresent());
- dataChecksum =
containerChecksumInfo.get().getContainerMerkleTree().getDataChecksum();
+ ContainerProtos.ContainerChecksumInfo containerChecksumInfo =
handler.getChecksumManager()
+ .read(container.getContainerData());
+ dataChecksum =
containerChecksumInfo.getContainerMerkleTree().getDataChecksum();
assertEquals(container.getContainerData().getDataChecksum(),
dataChecksum);
} catch (IOException ex) {
fail("Failed to read container checksum from disk", ex);
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
index 92e7a071cc..989dc0bddb 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainerCheck.java
@@ -38,7 +38,6 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
@@ -200,9 +199,8 @@ public void
testAllDataErrorsCollected(ContainerTestVersionInfo versionInfo) thr
checksumManager.writeContainerDataTree(containerData,
result.getDataTree());
// This will read the corrupted tree from the disk, which represents the
current state of the container, and
// compare it against the original healthy tree. The diff we get back
should match the failures we injected.
- Optional<ContainerProtos.ContainerChecksumInfo> generatedChecksumInfo =
checksumManager.read(containerData);
- assertTrue(generatedChecksumInfo.isPresent());
- ContainerDiffReport diffReport =
checksumManager.diff(generatedChecksumInfo.get(), healthyChecksumInfo);
+ ContainerProtos.ContainerChecksumInfo generatedChecksumInfo =
checksumManager.read(container.getContainerData());
+ ContainerDiffReport diffReport =
checksumManager.diff(generatedChecksumInfo, healthyChecksumInfo);
LOG.info("Diff of healthy container with actual container {}", diffReport);
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
index 97fd5a4b8c..a102dd4a5f 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandler.java
@@ -383,7 +383,8 @@ public void
testCloseInvalidContainer(ContainerLayoutVersion layoutVersion)
// Closing invalid container should return error response.
ContainerProtos.ContainerCommandResponseProto response =
keyValueHandler.handleCloseContainer(closeContainerRequest, container);
- assertTrue(ContainerChecksumTreeManager.checksumFileExist(container));
+ // Checksum will not be generated for an invalid container.
+
assertFalse(ContainerChecksumTreeManager.getContainerChecksumFile(kvData).exists());
assertEquals(ContainerProtos.Result.INVALID_CONTAINER_STATE,
response.getResult(),
@@ -679,7 +680,8 @@ public void
testUpdateContainerChecksum(ContainerLayoutVersion layoutVersion) th
// Initially, container should have no checksum information.
assertEquals(0, containerData.getDataChecksum());
- assertFalse(checksumManager.read(containerData).isPresent());
+ assertFalse(checksumManager.read(containerData).hasContainerMerkleTree());
+
assertFalse(ContainerChecksumTreeManager.getContainerChecksumFile(containerData).exists());
assertEquals(0, icrCount.get());
// Update container with checksum information.
@@ -689,7 +691,7 @@ public void
testUpdateContainerChecksum(ContainerLayoutVersion layoutVersion) th
// Check checksum in memory.
assertEquals(updatedDataChecksum, containerData.getDataChecksum());
// Check disk content.
- ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(containerData).get();
+ ContainerProtos.ContainerChecksumInfo checksumInfo =
checksumManager.read(containerData);
assertTreesSortedAndMatch(treeWriter.toProto(),
checksumInfo.getContainerMerkleTree());
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
index 343c8dcfee..8361959e6d 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueHandlerWithUnhealthyContainer.java
@@ -238,14 +238,14 @@ public void testMarkContainerUnhealthyInFailedVolume()
throws IOException {
// be ignored.
hddsVolume.setState(StorageVolume.VolumeState.FAILED);
handler.markContainerUnhealthy(container,
ContainerTestUtils.getUnhealthyDataScanResult());
- assertFalse(ContainerChecksumTreeManager.checksumFileExist(container));
+
assertFalse(ContainerChecksumTreeManager.getContainerChecksumFile(kvData).exists());
verify(mockIcrSender, never()).send(any());
// When volume is healthy, ICR should be sent when container is marked
// unhealthy.
hddsVolume.setState(StorageVolume.VolumeState.NORMAL);
handler.markContainerUnhealthy(container,
ContainerTestUtils.getUnhealthyDataScanResult());
- assertTrue(ContainerChecksumTreeManager.checksumFileExist(container));
+
assertTrue(ContainerChecksumTreeManager.getContainerChecksumFile(kvData).exists());
verify(mockIcrSender, atMostOnce()).send(any());
}
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java
index b1d544f480..79df162cf0 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/container/ReconcileSubcommand.java
@@ -41,7 +41,7 @@ public class ReconcileSubcommand extends ScmSubcommand {
public void execute(ScmClient scmClient) throws IOException {
scmClient.reconcileContainer(containerId);
System.out.println("Reconciliation has been triggered for container " +
containerId);
- // TODO a better option to check status may be added later.
+ // TODO HDDS-12078 allow status to be checked from the reconcile
subcommand directly.
System.out.println("Use \"ozone admin container info --json " +
containerId + "\" to see the checksums of each " +
"container replica");
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java
index 2306af221c..3c761f532f 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestBackgroundContainerDataScannerIntegration.java
@@ -17,6 +17,7 @@
package org.apache.hadoop.ozone.dn.scanner;
+import static org.apache.hadoop.hdds.HddsUtils.checksumToString;
import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED;
import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY;
import static
org.apache.hadoop.ozone.container.checksum.ContainerMerkleTreeTestUtils.readChecksumFile;
@@ -27,15 +28,12 @@
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
-import org.apache.hadoop.ozone.container.common.utils.ContainerLogger;
import org.apache.hadoop.ozone.container.keyvalue.TestContainerCorruptions;
import
org.apache.hadoop.ozone.container.ozoneimpl.BackgroundContainerDataScanner;
import
org.apache.hadoop.ozone.container.ozoneimpl.ContainerScannerConfiguration;
import org.apache.ozone.test.GenericTestUtils;
-import org.apache.ozone.test.GenericTestUtils.LogCapturer;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
@@ -47,9 +45,6 @@
class TestBackgroundContainerDataScannerIntegration
extends TestContainerScannerIntegrationAbstract {
- private final LogCapturer logCapturer =
- LogCapturer.log4j2(ContainerLogger.LOG_NAME);
-
@BeforeAll
static void init() throws Exception {
OzoneConfiguration ozoneConfig = new OzoneConfiguration();
@@ -83,10 +78,12 @@ void testCorruptionDetected(TestContainerCorruptions
corruption)
Container<?> container = getDnContainer(containerID);
assertEquals(State.CLOSED, container.getContainerState());
assertTrue(containerChecksumFileExists(containerID));
+ assertFalse(container.getContainerData().needsDataChecksum());
+ assertNotEquals(0, container.getContainerData().getDataChecksum());
waitForScmToSeeReplicaState(containerID, CLOSED);
long initialReportedDataChecksum =
getContainerReplica(containerID).getDataChecksum();
-
+ assertNotEquals(0, initialReportedDataChecksum);
corruption.applyTo(container);
resumeScanner();
@@ -103,23 +100,28 @@ void testCorruptionDetected(TestContainerCorruptions
corruption)
corruption == TestContainerCorruptions.MISSING_CONTAINER_DIR) {
// In these cases, the new tree will not be able to be written since it
exists in the metadata directory.
// When the tree write fails, the in-memory checksum should remain at
its original value.
- assertEquals(initialReportedDataChecksum, newReportedDataChecksum);
- assertFalse(containerChecksumFileExists(containerID));
+ assertEquals(checksumToString(initialReportedDataChecksum),
checksumToString(newReportedDataChecksum));
} else {
- assertNotEquals(initialReportedDataChecksum, newReportedDataChecksum);
+ assertNotEquals(checksumToString(initialReportedDataChecksum),
checksumToString(newReportedDataChecksum));
// Test that the scanner wrote updated checksum info to the disk.
- assertTrue(containerChecksumFileExists(containerID));
- ContainerProtos.ContainerChecksumInfo updatedChecksumInfo =
readChecksumFile(container.getContainerData());
- assertEquals(newReportedDataChecksum,
updatedChecksumInfo.getContainerMerkleTree().getDataChecksum());
+ assertReplicaChecksumMatches(container, newReportedDataChecksum);
+ assertFalse(container.getContainerData().needsDataChecksum());
}
if (corruption == TestContainerCorruptions.TRUNCATED_BLOCK ||
corruption == TestContainerCorruptions.CORRUPT_BLOCK) {
// These errors will affect multiple chunks and result in multiple log
messages.
- corruption.assertLogged(containerID, logCapturer);
+ corruption.assertLogged(containerID, getContainerLogCapturer());
} else {
// Other corruption types will only lead to a single error.
- corruption.assertLogged(containerID, 1, logCapturer);
+ corruption.assertLogged(containerID, 1, getContainerLogCapturer());
}
}
+
+ private void assertReplicaChecksumMatches(Container<?> container, long
expectedChecksum) throws Exception {
+
assertTrue(containerChecksumFileExists(container.getContainerData().getContainerID()));
+ long dataChecksumFromFile = readChecksumFile(container.getContainerData())
+ .getContainerMerkleTree().getDataChecksum();
+ assertEquals(checksumToString(expectedChecksum),
checksumToString(dataChecksumFromFile));
+ }
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java
index 2584138c12..3133a81b5c 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/scanner/TestContainerScannerIntegrationAbstract.java
@@ -22,6 +22,7 @@
import static org.apache.hadoop.hdds.client.ReplicationType.RATIS;
import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.io.IOException;
@@ -68,6 +69,9 @@ public abstract class TestContainerScannerIntegrationAbstract
{
private static String volumeName;
private static String bucketName;
private static OzoneBucket bucket;
+ // Log4j 2 capturer currently doesn't support capturing specific logs.
+ // We must use one capturer for both the container and application logs.
+ private final GenericTestUtils.LogCapturer logCapturer =
GenericTestUtils.LogCapturer.log4j2("");
public static void buildCluster(OzoneConfiguration ozoneConfig)
throws Exception {
@@ -166,6 +170,13 @@ protected void closeContainerAndWait(long containerID)
throws Exception {
() -> TestHelper.isContainerClosed(cluster, containerID,
cluster.getHddsDatanodes().get(0).getDatanodeDetails()),
1000, 5000);
+
+ // After the container is marked as closed in the datanode, we must wait
for the checksum generation from metadata
+ // to finish.
+ LambdaTestUtils.await(5000, 1000, () ->
+ getContainerReplica(containerID).getDataChecksum() != 0);
+ long closedChecksum = getContainerReplica(containerID).getDataChecksum();
+ assertNotEquals(0, closedChecksum);
}
protected long writeDataToOpenContainer() throws Exception {
@@ -201,6 +212,10 @@ protected void readFromCorruptedKey(String keyName) throws
IOException {
}
}
+ protected GenericTestUtils.LogCapturer getContainerLogCapturer() {
+ return logCapturer;
+ }
+
private OzoneOutputStream createKey(String keyName) throws Exception {
return TestHelper.createKey(
keyName, RATIS, ONE, 0, store, volumeName, bucketName);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]