This is an automated email from the ASF dual-hosted git repository.
ashishkr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 9ccfb98f782 HDDS-14416. Handle delete ratis replica in
DELETED/DELETING container scenario. (#9630)
9ccfb98f782 is described below
commit 9ccfb98f7822e37b9f690e32af6158e693dae04a
Author: Ashish Kumar <[email protected]>
AuthorDate: Fri Jan 23 09:44:11 2026 +0530
HDDS-14416. Handle delete ratis replica in DELETED/DELETING container
scenario. (#9630)
---
.../container/AbstractContainerReportHandler.java | 6 +
.../scm/container/TestContainerReportHandler.java | 23 ++-
.../scm/container/TestContainerStateManager.java | 204 ++++++++++++++++++++-
.../container/TestContainerReportHandling.java | 32 +++-
.../TestContainerReportHandlingWithHA.java | 35 ++--
5 files changed, 267 insertions(+), 33 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
index 35908afff87..f79507e1e66 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
@@ -317,6 +317,12 @@ private boolean updateContainerState(final DatanodeDetails
datanode,
case DELETING:
// HDDS-11136: If a DELETING container has a non-empty CLOSED replica,
transition the container to CLOSED
// HDDS-12421: If a DELETING or DELETED container has a non-empty
replica, transition the container to CLOSED
+ if (replica.getState() == State.CLOSED &&
replica.getBlockCommitSequenceId() <= container.getSequenceId()
+ &&
container.getReplicationType().equals(HddsProtos.ReplicationType.RATIS)) {
+ deleteReplica(containerId, datanode, publisher, "DELETED", true,
detailsForLogging);
+ // We should not move back to CLOSED state if replica bcsid <=
container bcsid
+ return false;
+ }
boolean replicaStateAllowed = (replica.getState() != State.INVALID &&
replica.getState() != State.DELETED);
if (!replicaIsEmpty && replicaStateAllowed) {
getLogger().info("transitionDeletingToClosed due to non-empty CLOSED
replica (keyCount={}) for {}",
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
index edd224fbdaa..0e8eee799bc 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
@@ -83,7 +83,6 @@
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.EnumSource;
import org.junit.jupiter.params.provider.MethodSource;
-import org.junit.jupiter.params.provider.ValueSource;
/**
* Test the behaviour of the ContainerReportHandler.
@@ -204,6 +203,12 @@ static Stream<Arguments> containerAndReplicaStates() {
replicaState.equals(ContainerReplicaProto.State.QUASI_CLOSED)) {
continue;
}
+ if (replicationType == HddsProtos.ReplicationType.RATIS &&
+ replicaState.equals(ContainerReplicaProto.State.CLOSED) &&
+ (containerState.equals(HddsProtos.LifeCycleState.DELETED) ||
+ containerState.equals(HddsProtos.LifeCycleState.DELETING))) {
+ continue;
+ }
for (ContainerReplicaProto.State invalidState :
invalidReplicaStates) {
combinations.add(Arguments.of(replicationType, containerState,
replicaState, invalidState));
}
@@ -1142,9 +1147,8 @@ public void
closedECContainerKeyAndBytesUsedUpdatedToMinimumOfAllReplicas()
.getNumberOfKeys());
}
- @ParameterizedTest
- @ValueSource(booleans = {false, true})
- public void testStaleReplicaOfDeletedContainer(boolean isEmpty) throws
NodeNotFoundException, IOException {
+ @Test
+ public void testStaleReplicaOfDeletedContainer() throws
NodeNotFoundException, IOException {
final ContainerReportHandler reportHandler = new
ContainerReportHandler(nodeManager, containerManager);
final Iterator<DatanodeDetails> nodeIterator = nodeManager.getNodes(
@@ -1161,18 +1165,13 @@ public void testStaleReplicaOfDeletedContainer(boolean
isEmpty) throws NodeNotFo
final ContainerReportsProto containerReport = getContainerReportsProto(
containerOne.containerID(), ContainerReplicaProto.State.CLOSED,
- datanodeOne.getUuidString(), 0, isEmpty);
+ datanodeOne.getUuidString(), 0, true);
final ContainerReportFromDatanode containerReportFromDatanode =
new ContainerReportFromDatanode(datanodeOne, containerReport);
reportHandler.onMessage(containerReportFromDatanode, publisher);
- if (isEmpty) {
- // Expect the replica to be deleted when it is empty
- verify(publisher, times(1)).fireEvent(any(),
any(CommandForDatanode.class));
- } else {
- // Expect the replica to stay when it is NOT empty
- verify(publisher, times(0)).fireEvent(any(),
any(CommandForDatanode.class));
- }
+ // Expect the replica to be deleted when it is empty
+ verify(publisher, times(1)).fireEvent(any(),
any(CommandForDatanode.class));
assertEquals(1,
containerManager.getContainerReplicas(containerOne.containerID()).size());
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
index 799ea3fbcd5..869b7dd5bb9 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
@@ -17,11 +17,18 @@
package org.apache.hadoop.hdds.scm.container;
+import static
org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.getECContainer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.File;
@@ -32,6 +39,7 @@
import java.util.Set;
import java.util.concurrent.TimeoutException;
import org.apache.hadoop.hdds.HddsConfigKeys;
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -39,23 +47,33 @@
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
import
org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException;
import
org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaPendingOps;
+import org.apache.hadoop.hdds.scm.events.SCMEvents;
+import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.ha.SCMHAManager;
import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub;
import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition;
+import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
+import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher;
+import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.hdds.utils.db.DBStore;
import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
+import
org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException;
+import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
+import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
+import org.mockito.ArgumentCaptor;
/**
* Testing ContainerStatemanager.
@@ -67,9 +85,13 @@ public class TestContainerStateManager {
private File testDir;
private DBStore dbStore;
private Pipeline pipeline;
+ private MockNodeManager nodeManager;
+ private ContainerManager containerManager;
+ private SCMContext scmContext;
+ private EventPublisher publisher;
@BeforeEach
- public void init() throws IOException, TimeoutException {
+ public void init() throws IOException, TimeoutException,
InvalidStateTransitionException {
OzoneConfiguration conf = new OzoneConfiguration();
SCMHAManager scmhaManager = SCMHAManagerStub.getInstance(true);
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath());
@@ -95,6 +117,48 @@ public void init() throws IOException, TimeoutException {
Clock.system(ZoneId.systemDefault()), null))
.build();
+ nodeManager = new MockNodeManager(true, 10);
+ containerManager = mock(ContainerManager.class);
+ scmContext = SCMContext.emptyContext();
+ scmContext.updateLeaderAndTerm(true, 1L);
+ publisher = mock(EventPublisher.class);
+
+ when(containerManager.getContainer(any(ContainerID.class)))
+ .thenAnswer(invocation -> containerStateManager
+ .getContainer((ContainerID) invocation.getArguments()[0]));
+
+ when(containerManager.getContainerReplicas(any(ContainerID.class)))
+ .thenAnswer(invocation -> containerStateManager
+ .getContainerReplicas((ContainerID) invocation.getArguments()[0]));
+
+ doAnswer(invocation -> {
+ containerStateManager.updateContainerStateWithSequenceId(
+ ((ContainerID) invocation.getArguments()[0]).getProtobuf(),
+ (HddsProtos.LifeCycleEvent) invocation.getArguments()[1], 0L);
+ return null;
+ }).when(containerManager).updateContainerState(
+ any(ContainerID.class), any(HddsProtos.LifeCycleEvent.class));
+
+ doAnswer(invocation -> {
+ containerStateManager.updateContainerReplica(
+ (ContainerReplica) invocation.getArguments()[1]);
+ return null;
+ }).when(containerManager).updateContainerReplica(
+ any(ContainerID.class), any(ContainerReplica.class));
+
+ doAnswer(invocation -> {
+ containerStateManager.removeContainerReplica(
+ (ContainerReplica) invocation.getArguments()[1]);
+ return null;
+ }).when(containerManager).removeContainerReplica(
+ any(ContainerID.class), any(ContainerReplica.class));
+
+ doAnswer(invocation -> {
+ containerStateManager.transitionDeletingOrDeletedToClosedState(
+ ((ContainerID) invocation.getArgument(0)).getProtobuf());
+ return null;
+
}).when(containerManager).transitionDeletingOrDeletedToClosedState(any(ContainerID.class));
+
}
@AfterEach
@@ -197,6 +261,115 @@ public void
testTransitionContainerToClosedStateAllowOnlyDeletingOrDeletedContai
}
}
+ /**
+ * DELETED container + CLOSED replica with BCSID <= container seqId + RATIS
replication.
+ * Expected: Force delete command should be sent (force=true)
+ */
+ @Test
+ public void testDeletedContainerWithStaleClosedReplicaRatis()
+ throws IOException {
+ final ContainerInfo container =
getContainer(HddsProtos.LifeCycleState.DELETED);
+ containerStateManager.addContainer(container.getProtobuf());
+ assertEquals(HddsProtos.ReplicationType.RATIS,
container.getReplicationType());
+ final DatanodeDetails datanode = nodeManager.getNodes(
+ NodeStatus.inServiceHealthy()).iterator().next();
+ // Report CLOSED replica with BCSID = 10000L (equal to container's seqId)
+ DeleteContainerCommand deleteCmd = sendReportAndCaptureDeleteCommand(
+ container, datanode, 10000L, false, 0, true);
+ // Verify force delete
+ verifyForceDeleteCommand(deleteCmd, container.containerID(), true,
+ "Delete command should have force=true for stale RATIS replica");
+ verifyContainerState(container.containerID(),
HddsProtos.LifeCycleState.DELETED);
+ }
+
+ /**
+ * Test: DELETED container + CLOSED replica with BCSID < container seqId +
RATIS.
+ * Expected: Force delete command should be sent (BCSID is lower)
+ */
+ @Test
+ public void testDeletedContainerWithLowerBcsidStaleReplicaRatis()
+ throws IOException {
+ final ContainerInfo container =
getContainer(HddsProtos.LifeCycleState.DELETED);
+ containerStateManager.addContainer(container.getProtobuf());
+ final DatanodeDetails datanode = nodeManager.getNodes(
+ NodeStatus.inServiceHealthy()).iterator().next();
+ // Report CLOSED replica with BCSID = 9000L (lower than container's 10000L)
+ DeleteContainerCommand deleteCmd = sendReportAndCaptureDeleteCommand(
+ container, datanode, 9000L, false, 0, true);
+ verifyForceDeleteCommand(deleteCmd, container.containerID(), true,
+ "Delete command should have force=true for stale RATIS replica with
lower BCSID");
+ }
+
+ /**
+ * DELETED EC container + CLOSED replica with BCSID <= container seqId.
+ * Expected: Should NOT send force delete
+ * Should transition to CLOSED instead
+ */
+ @Test
+ public void
testDeletedECContainerWithStaleClosedReplicaShouldNotForceDelete()
+ throws IOException {
+ final DatanodeDetails datanode = randomDatanodeDetails();
+ nodeManager.register(datanode, null, null);
+ // Create a DELETED EC container
+ ECReplicationConfig repConfig = new ECReplicationConfig(3, 2);
+ final ContainerInfo ecContainer = getECContainer(
+ HddsProtos.LifeCycleState.DELETED,
+ PipelineID.randomId(),
+ repConfig);
+ containerStateManager.addContainer(ecContainer.getProtobuf());
+ assertEquals(HddsProtos.ReplicationType.EC,
ecContainer.getReplicationType());
+ // Report CLOSED replica with BCSID = container's seqId
+ sendReportAndCaptureDeleteCommand(ecContainer, datanode,
+ ecContainer.getSequenceId(), false, 1, false);
+ // Container should transition to CLOSED
+ verifyContainerState(ecContainer.containerID(),
HddsProtos.LifeCycleState.CLOSED);
+ }
+
+ private DeleteContainerCommand sendReportAndCaptureDeleteCommand(
+ ContainerInfo container, DatanodeDetails datanode,
+ long bcsId, boolean isEmpty, int replicaIndex, boolean reqCommandSend) {
+ final ContainerReportHandler reportHandler =
+ new ContainerReportHandler(nodeManager, containerManager, scmContext,
null);
+ final StorageContainerDatanodeProtocolProtos.ContainerReportsProto
containerReport =
+ getContainerReportsProto(container.containerID(),
+ ContainerReplicaProto.State.CLOSED,
+ datanode.getUuidString(),
+ bcsId,
+ isEmpty,
+ replicaIndex);
+ final SCMDatanodeHeartbeatDispatcher.ContainerReportFromDatanode
reportFromDatanode =
+ new
SCMDatanodeHeartbeatDispatcher.ContainerReportFromDatanode(datanode,
containerReport);
+ reportHandler.onMessage(reportFromDatanode, publisher);
+ // Capture the delete command
+ ArgumentCaptor<CommandForDatanode<?>> commandCaptor =
+ ArgumentCaptor.forClass(CommandForDatanode.class);
+ if (reqCommandSend) {
+ verify(publisher, times(1))
+ .fireEvent(eq(SCMEvents.DATANODE_COMMAND), commandCaptor.capture());
+ CommandForDatanode<?> capturedCommand = commandCaptor.getValue();
+ assertEquals(DeleteContainerCommand.class,
capturedCommand.getCommand().getClass());
+ return (DeleteContainerCommand) capturedCommand.getCommand();
+ } else {
+ verify(publisher, times(0))
+ .fireEvent(eq(SCMEvents.DATANODE_COMMAND), commandCaptor.capture());
+ return null;
+ }
+ }
+
+ private void verifyForceDeleteCommand(DeleteContainerCommand deleteCmd,
+ ContainerID expectedContainerId, boolean expectedForce, String message) {
+ assertEquals(expectedForce, deleteCmd.isForce(), message);
+ assertEquals(expectedContainerId.getId(), deleteCmd.getContainerID());
+ }
+
+ /**
+ * Verifies the container is in the expected state.
+ */
+ private void verifyContainerState(ContainerID containerId,
+ HddsProtos.LifeCycleState expectedState) throws IOException {
+ assertEquals(expectedState,
containerManager.getContainer(containerId).getState());
+ }
+
@Test
public void testSequenceIdOnStateUpdate() throws Exception {
ContainerID containerID = ContainerID.valueOf(3L);
@@ -264,4 +437,33 @@ private ContainerInfo allocateContainer()
return containerInfo;
}
+ private static StorageContainerDatanodeProtocolProtos.ContainerReportsProto
getContainerReportsProto(
+ final ContainerID containerId,
+ final ContainerReplicaProto.State state,
+ final String originNodeId,
+ final long bcsId,
+ final boolean isEmpty,
+ final int replicaIndex) {
+ final StorageContainerDatanodeProtocolProtos.ContainerReportsProto.Builder
crBuilder =
+
StorageContainerDatanodeProtocolProtos.ContainerReportsProto.newBuilder();
+ final ContainerReplicaProto replicaProto =
+ ContainerReplicaProto.newBuilder()
+ .setContainerID(containerId.getProtobuf().getId())
+ .setState(state)
+ .setOriginNodeId(originNodeId)
+ .setSize(5368709120L)
+ .setUsed(isEmpty ? 0L : 2000000000L)
+ .setKeyCount(isEmpty ? 0L : 100000000L)
+ .setReadCount(100000000L)
+ .setWriteCount(100000000L)
+ .setReadBytes(2000000000L)
+ .setWriteBytes(2000000000L)
+ .setBlockCommitSequenceId(bcsId)
+ .setDeleteTransactionId(0)
+ .setReplicaIndex(replicaIndex)
+ .setIsEmpty(isEmpty)
+ .build();
+ return crBuilder.addReports(replicaProto).build();
+ }
+
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
index 5327fc78bf0..3617bd2c219 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
@@ -25,6 +25,7 @@
import static
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -36,10 +37,11 @@
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
-import org.apache.hadoop.ozone.HddsDatanodeService;
+import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.TestDataUtil;
import org.apache.hadoop.ozone.client.ObjectStore;
@@ -50,6 +52,7 @@
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
+import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
@@ -62,16 +65,16 @@ public class TestContainerReportHandling {
private static final String KEY = "key1";
/**
- * Tests that a DELETING (or DELETED) container moves to the CLOSED state if
a non-empty replica is reported.
+ * Tests that a DELETING (or DELETED) container replica gets deleted when
replica bcsid <= container bcsid
* To do this, the test first creates a key and closes its corresponding
container. Then it moves that container to
* DELETING (or DELETED) state using ContainerManager. Then it restarts a
Datanode hosting that container,
* making it send a full container report.
- * Finally, the test waits for the container to move from DELETING (or
DELETED) to CLOSED.
+ * Tests wait for a DELETING (or DELETED) container replica gets deleted
when replica bcsid <= container bcsid
*/
@ParameterizedTest
@EnumSource(value = HddsProtos.LifeCycleState.class,
names = {"DELETING", "DELETED"})
- void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsReported(
+ void testDeletingOrDeletedContainerWhenNonEmptyReplicaIsReported(
HddsProtos.LifeCycleState desiredState)
throws Exception {
OzoneConfiguration conf = new OzoneConfiguration();
@@ -97,6 +100,7 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
// move the container to DELETING
ContainerManager containerManager =
cluster.getStorageContainerManager().getContainerManager();
+
assertFalse(containerManager.getContainerReplicas(containerID).isEmpty());
containerManager.updateContainerState(containerID,
HddsProtos.LifeCycleEvent.DELETE);
assertEquals(HddsProtos.LifeCycleState.DELETING,
containerManager.getContainer(containerID).getState());
@@ -106,12 +110,22 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
assertEquals(HddsProtos.LifeCycleState.DELETED,
containerManager.getContainer(containerID).getState());
}
- // restart a DN and wait for the container to get CLOSED.
- HddsDatanodeService dn =
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
- cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
- waitForContainerStateInSCM(cluster.getStorageContainerManager(),
containerID, HddsProtos.LifeCycleState.CLOSED);
+ // restart all the DNs
+ List<DatanodeDetails> dnlist = keyLocation.getPipeline().getNodes();
+ for (DatanodeDetails dn: dnlist) {
+ cluster.restartHddsDatanode(dn, false);
+ }
- assertEquals(HddsProtos.LifeCycleState.CLOSED,
containerManager.getContainer(containerID).getState());
+ // Since replica state is CLOSED and container is DELETED/DELETING in
SCM
+ // also bcsid of replica and container is same, SCM will trigger
delete replica
+ // wait for all replica to be deleted
+ GenericTestUtils.waitFor(() -> {
+ try {
+ return
containerManager.getContainerReplicas(containerID).isEmpty();
+ } catch (ContainerNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }, 100, 180000);
}
} finally {
if (clusterPath != null) {
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
index d9337ef716b..fc2a6c9ec63 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
@@ -25,6 +25,7 @@
import static
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -37,11 +38,12 @@
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
+import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
import org.apache.hadoop.ozone.TestDataUtil;
@@ -53,6 +55,7 @@
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
+import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
@@ -65,16 +68,16 @@ public class TestContainerReportHandlingWithHA {
private static final String KEY = "key1";
/**
- * Tests that a DELETING (or DELETED) container moves to the CLOSED state if
a non-empty replica is reported.
+ * Tests that a DELETING (or DELETED) container replica gets deleted when
replica bcsid <= container bcsid
* To do this, the test first creates a key and closes its corresponding
container. Then it moves that container to
- * DELETING (or DELETED) state using ContainerManager. Then it restarts a
Datanode hosting that container,
+ * DELETING (or DELETED) state using ContainerManager. Then it restarts
Datanodes hosting that container,
* making it send a full container report.
- * Finally, the test waits for the container to move from DELETING (or
DELETED) to CLOSED in all SCMs.
+ * Tests wait for a DELETING (or DELETED) container replica gets deleted
when replica bcsid <= container bcsid
*/
@ParameterizedTest
@EnumSource(value = HddsProtos.LifeCycleState.class,
names = {"DELETING", "DELETED"})
- void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsReportedWithScmHA(
+ void testDeletingOrDeletedContainerWhenNonEmptyReplicaIsReportedWithScmHA(
HddsProtos.LifeCycleState desiredState)
throws Exception {
OzoneConfiguration conf = new OzoneConfiguration();
@@ -100,6 +103,7 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
// move the container to DELETING
ContainerManager containerManager =
cluster.getScmLeader().getContainerManager();
+
assertFalse(containerManager.getContainerReplicas(containerID).isEmpty());
containerManager.updateContainerState(containerID,
HddsProtos.LifeCycleEvent.DELETE);
assertEquals(HddsProtos.LifeCycleState.DELETING,
containerManager.getContainer(containerID).getState());
@@ -109,13 +113,22 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
assertEquals(HddsProtos.LifeCycleState.DELETED,
containerManager.getContainer(containerID).getState());
}
- // restart a DN and wait for the container to get CLOSED in all SCMs
- HddsDatanodeService dn =
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
- cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
-
- waitForContainerStateInAllSCMs(cluster, containerID,
HddsProtos.LifeCycleState.CLOSED);
+ // restart all the DNs
+ List<DatanodeDetails> dnlist = keyLocation.getPipeline().getNodes();
+ for (DatanodeDetails dn: dnlist) {
+ cluster.restartHddsDatanode(dn, false);
+ }
- assertEquals(HddsProtos.LifeCycleState.CLOSED,
containerManager.getContainer(containerID).getState());
+ // Since replica state is CLOSED and container is DELETED/DELETING in
SCM
+ // also bcsid of replica and container is same, SCM will trigger
delete replica
+ // wait for all replica to be deleted
+ GenericTestUtils.waitFor(() -> {
+ try {
+ return
containerManager.getContainerReplicas(containerID).isEmpty();
+ } catch (ContainerNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }, 100, 180000);
}
} finally {
if (clusterPath != null) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]