This is an automated email from the ASF dual-hosted git repository.

ashishkr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 9ccfb98f782 HDDS-14416. Handle delete ratis replica in 
DELETED/DELETING container scenario. (#9630)
9ccfb98f782 is described below

commit 9ccfb98f7822e37b9f690e32af6158e693dae04a
Author: Ashish Kumar <[email protected]>
AuthorDate: Fri Jan 23 09:44:11 2026 +0530

    HDDS-14416. Handle delete ratis replica in DELETED/DELETING container 
scenario. (#9630)
---
 .../container/AbstractContainerReportHandler.java  |   6 +
 .../scm/container/TestContainerReportHandler.java  |  23 ++-
 .../scm/container/TestContainerStateManager.java   | 204 ++++++++++++++++++++-
 .../container/TestContainerReportHandling.java     |  32 +++-
 .../TestContainerReportHandlingWithHA.java         |  35 ++--
 5 files changed, 267 insertions(+), 33 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
index 35908afff87..f79507e1e66 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/AbstractContainerReportHandler.java
@@ -317,6 +317,12 @@ private boolean updateContainerState(final DatanodeDetails 
datanode,
     case DELETING:
       // HDDS-11136: If a DELETING container has a non-empty CLOSED replica, 
transition the container to CLOSED
       // HDDS-12421: If a DELETING or DELETED container has a non-empty 
replica, transition the container to CLOSED
+      if (replica.getState() == State.CLOSED && 
replica.getBlockCommitSequenceId() <= container.getSequenceId()
+          && 
container.getReplicationType().equals(HddsProtos.ReplicationType.RATIS)) {
+        deleteReplica(containerId, datanode, publisher, "DELETED", true, 
detailsForLogging);
+        // We should not move back to CLOSED state if replica bcsid <= 
container bcsid
+        return false;
+      }
       boolean replicaStateAllowed = (replica.getState() != State.INVALID && 
replica.getState() != State.DELETED);
       if (!replicaIsEmpty && replicaStateAllowed) {
         getLogger().info("transitionDeletingToClosed due to non-empty CLOSED 
replica (keyCount={}) for {}",
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
index edd224fbdaa..0e8eee799bc 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerReportHandler.java
@@ -83,7 +83,6 @@
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.EnumSource;
 import org.junit.jupiter.params.provider.MethodSource;
-import org.junit.jupiter.params.provider.ValueSource;
 
 /**
  * Test the behaviour of the ContainerReportHandler.
@@ -204,6 +203,12 @@ static Stream<Arguments> containerAndReplicaStates() {
               replicaState.equals(ContainerReplicaProto.State.QUASI_CLOSED)) {
             continue;
           }
+          if (replicationType == HddsProtos.ReplicationType.RATIS &&
+              replicaState.equals(ContainerReplicaProto.State.CLOSED) &&
+              (containerState.equals(HddsProtos.LifeCycleState.DELETED) ||
+              containerState.equals(HddsProtos.LifeCycleState.DELETING))) {
+            continue;
+          }
           for (ContainerReplicaProto.State invalidState : 
invalidReplicaStates) {
             combinations.add(Arguments.of(replicationType, containerState, 
replicaState, invalidState));
           }
@@ -1142,9 +1147,8 @@ public void 
closedECContainerKeyAndBytesUsedUpdatedToMinimumOfAllReplicas()
         .getNumberOfKeys());
   }
 
-  @ParameterizedTest
-  @ValueSource(booleans = {false, true})
-  public void testStaleReplicaOfDeletedContainer(boolean isEmpty) throws 
NodeNotFoundException, IOException {
+  @Test
+  public void testStaleReplicaOfDeletedContainer() throws 
NodeNotFoundException, IOException {
     final ContainerReportHandler reportHandler = new 
ContainerReportHandler(nodeManager, containerManager);
 
     final Iterator<DatanodeDetails> nodeIterator = nodeManager.getNodes(
@@ -1161,18 +1165,13 @@ public void testStaleReplicaOfDeletedContainer(boolean 
isEmpty) throws NodeNotFo
 
     final ContainerReportsProto containerReport = getContainerReportsProto(
         containerOne.containerID(), ContainerReplicaProto.State.CLOSED,
-        datanodeOne.getUuidString(), 0, isEmpty);
+        datanodeOne.getUuidString(), 0, true);
     final ContainerReportFromDatanode containerReportFromDatanode =
         new ContainerReportFromDatanode(datanodeOne, containerReport);
     reportHandler.onMessage(containerReportFromDatanode, publisher);
 
-    if (isEmpty) {
-      // Expect the replica to be deleted when it is empty
-      verify(publisher, times(1)).fireEvent(any(), 
any(CommandForDatanode.class));
-    } else {
-      // Expect the replica to stay when it is NOT empty
-      verify(publisher, times(0)).fireEvent(any(), 
any(CommandForDatanode.class));
-    }
+    // Expect the replica to be deleted when it is empty
+    verify(publisher, times(1)).fireEvent(any(), 
any(CommandForDatanode.class));
     assertEquals(1, 
containerManager.getContainerReplicas(containerOne.containerID()).size());
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
index 799ea3fbcd5..869b7dd5bb9 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestContainerStateManager.java
@@ -17,11 +17,18 @@
 
 package org.apache.hadoop.hdds.scm.container;
 
+import static 
org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.getContainer;
+import static org.apache.hadoop.hdds.scm.HddsTestUtils.getECContainer;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertInstanceOf;
 import static org.junit.jupiter.api.Assertions.fail;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.any;
+import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import java.io.File;
@@ -32,6 +39,7 @@
 import java.util.Set;
 import java.util.concurrent.TimeoutException;
 import org.apache.hadoop.hdds.HddsConfigKeys;
+import org.apache.hadoop.hdds.client.ECReplicationConfig;
 import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.client.StandaloneReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -39,23 +47,33 @@
 import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
+import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
 import 
org.apache.hadoop.hdds.scm.container.common.helpers.InvalidContainerStateException;
 import 
org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaPendingOps;
+import org.apache.hadoop.hdds.scm.events.SCMEvents;
+import org.apache.hadoop.hdds.scm.ha.SCMContext;
 import org.apache.hadoop.hdds.scm.ha.SCMHAManager;
 import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub;
 import org.apache.hadoop.hdds.scm.metadata.SCMDBDefinition;
+import org.apache.hadoop.hdds.scm.node.NodeStatus;
 import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
 import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
+import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher;
+import org.apache.hadoop.hdds.server.events.EventPublisher;
 import org.apache.hadoop.hdds.utils.db.DBStore;
 import org.apache.hadoop.hdds.utils.db.DBStoreBuilder;
+import 
org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException;
+import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
+import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
+import org.mockito.ArgumentCaptor;
 
 /**
  * Testing ContainerStatemanager.
@@ -67,9 +85,13 @@ public class TestContainerStateManager {
   private File testDir;
   private DBStore dbStore;
   private Pipeline pipeline;
+  private MockNodeManager nodeManager;
+  private ContainerManager containerManager;
+  private SCMContext scmContext;
+  private EventPublisher publisher;
 
   @BeforeEach
-  public void init() throws IOException, TimeoutException {
+  public void init() throws IOException, TimeoutException, 
InvalidStateTransitionException {
     OzoneConfiguration conf = new OzoneConfiguration();
     SCMHAManager scmhaManager = SCMHAManagerStub.getInstance(true);
     conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath());
@@ -95,6 +117,48 @@ public void init() throws IOException, TimeoutException {
             Clock.system(ZoneId.systemDefault()), null))
         .build();
 
+    nodeManager = new MockNodeManager(true, 10);
+    containerManager = mock(ContainerManager.class);
+    scmContext = SCMContext.emptyContext();
+    scmContext.updateLeaderAndTerm(true, 1L);
+    publisher = mock(EventPublisher.class);
+
+    when(containerManager.getContainer(any(ContainerID.class)))
+        .thenAnswer(invocation -> containerStateManager
+            .getContainer((ContainerID) invocation.getArguments()[0]));
+
+    when(containerManager.getContainerReplicas(any(ContainerID.class)))
+        .thenAnswer(invocation -> containerStateManager
+            .getContainerReplicas((ContainerID) invocation.getArguments()[0]));
+
+    doAnswer(invocation -> {
+      containerStateManager.updateContainerStateWithSequenceId(
+          ((ContainerID) invocation.getArguments()[0]).getProtobuf(),
+          (HddsProtos.LifeCycleEvent) invocation.getArguments()[1], 0L);
+      return null;
+    }).when(containerManager).updateContainerState(
+        any(ContainerID.class), any(HddsProtos.LifeCycleEvent.class));
+
+    doAnswer(invocation -> {
+      containerStateManager.updateContainerReplica(
+          (ContainerReplica) invocation.getArguments()[1]);
+      return null;
+    }).when(containerManager).updateContainerReplica(
+        any(ContainerID.class), any(ContainerReplica.class));
+
+    doAnswer(invocation -> {
+      containerStateManager.removeContainerReplica(
+          (ContainerReplica) invocation.getArguments()[1]);
+      return null;
+    }).when(containerManager).removeContainerReplica(
+        any(ContainerID.class), any(ContainerReplica.class));
+
+    doAnswer(invocation -> {
+      containerStateManager.transitionDeletingOrDeletedToClosedState(
+          ((ContainerID) invocation.getArgument(0)).getProtobuf());
+      return null;
+    
}).when(containerManager).transitionDeletingOrDeletedToClosedState(any(ContainerID.class));
+
   }
 
   @AfterEach
@@ -197,6 +261,115 @@ public void 
testTransitionContainerToClosedStateAllowOnlyDeletingOrDeletedContai
     }
   }
 
+  /**
+   * DELETED container + CLOSED replica with BCSID <= container seqId + RATIS 
replication.
+   * Expected: Force delete command should be sent (force=true)
+   */
+  @Test
+  public void testDeletedContainerWithStaleClosedReplicaRatis()
+      throws IOException {
+    final ContainerInfo container = 
getContainer(HddsProtos.LifeCycleState.DELETED);
+    containerStateManager.addContainer(container.getProtobuf());
+    assertEquals(HddsProtos.ReplicationType.RATIS, 
container.getReplicationType());
+    final DatanodeDetails datanode = nodeManager.getNodes(
+        NodeStatus.inServiceHealthy()).iterator().next();
+    // Report CLOSED replica with BCSID = 10000L (equal to container's seqId)
+    DeleteContainerCommand deleteCmd = sendReportAndCaptureDeleteCommand(
+        container, datanode, 10000L, false, 0, true);
+    // Verify force delete
+    verifyForceDeleteCommand(deleteCmd, container.containerID(), true,
+        "Delete command should have force=true for stale RATIS replica");
+    verifyContainerState(container.containerID(), 
HddsProtos.LifeCycleState.DELETED);
+  }
+
+  /**
+   * Test: DELETED container + CLOSED replica with BCSID < container seqId + 
RATIS.
+   * Expected: Force delete command should be sent (BCSID is lower)
+   */
+  @Test
+  public void testDeletedContainerWithLowerBcsidStaleReplicaRatis()
+      throws IOException {
+    final ContainerInfo container = 
getContainer(HddsProtos.LifeCycleState.DELETED);
+    containerStateManager.addContainer(container.getProtobuf());
+    final DatanodeDetails datanode = nodeManager.getNodes(
+        NodeStatus.inServiceHealthy()).iterator().next();
+    // Report CLOSED replica with BCSID = 9000L (lower than container's 10000L)
+    DeleteContainerCommand deleteCmd = sendReportAndCaptureDeleteCommand(
+        container, datanode, 9000L, false, 0, true);
+    verifyForceDeleteCommand(deleteCmd, container.containerID(), true,
+        "Delete command should have force=true for stale RATIS replica with 
lower BCSID");
+  }
+
+  /**
+   * DELETED EC container + CLOSED replica with BCSID <= container seqId.
+   * Expected: Should NOT send force delete
+   * Should transition to CLOSED instead
+   */
+  @Test
+  public void 
testDeletedECContainerWithStaleClosedReplicaShouldNotForceDelete()
+      throws IOException {
+    final DatanodeDetails datanode = randomDatanodeDetails();
+    nodeManager.register(datanode, null, null);
+    // Create a DELETED EC container
+    ECReplicationConfig repConfig = new ECReplicationConfig(3, 2);
+    final ContainerInfo ecContainer = getECContainer(
+        HddsProtos.LifeCycleState.DELETED,
+        PipelineID.randomId(),
+        repConfig);
+    containerStateManager.addContainer(ecContainer.getProtobuf());
+    assertEquals(HddsProtos.ReplicationType.EC, 
ecContainer.getReplicationType());
+    // Report CLOSED replica with BCSID = container's seqId
+    sendReportAndCaptureDeleteCommand(ecContainer, datanode,
+        ecContainer.getSequenceId(), false, 1, false);
+    // Container should transition to CLOSED
+    verifyContainerState(ecContainer.containerID(), 
HddsProtos.LifeCycleState.CLOSED);
+  }
+
+  private DeleteContainerCommand sendReportAndCaptureDeleteCommand(
+      ContainerInfo container, DatanodeDetails datanode,
+      long bcsId, boolean isEmpty, int replicaIndex, boolean reqCommandSend) {
+    final ContainerReportHandler reportHandler =
+        new ContainerReportHandler(nodeManager, containerManager, scmContext, 
null);
+    final StorageContainerDatanodeProtocolProtos.ContainerReportsProto 
containerReport =
+        getContainerReportsProto(container.containerID(),
+            ContainerReplicaProto.State.CLOSED,
+            datanode.getUuidString(),
+            bcsId,
+            isEmpty,
+            replicaIndex);
+    final SCMDatanodeHeartbeatDispatcher.ContainerReportFromDatanode 
reportFromDatanode =
+        new 
SCMDatanodeHeartbeatDispatcher.ContainerReportFromDatanode(datanode, 
containerReport);
+    reportHandler.onMessage(reportFromDatanode, publisher);
+    // Capture the delete command
+    ArgumentCaptor<CommandForDatanode<?>> commandCaptor =
+        ArgumentCaptor.forClass(CommandForDatanode.class);
+    if (reqCommandSend) {
+      verify(publisher, times(1))
+          .fireEvent(eq(SCMEvents.DATANODE_COMMAND), commandCaptor.capture());
+      CommandForDatanode<?> capturedCommand = commandCaptor.getValue();
+      assertEquals(DeleteContainerCommand.class, 
capturedCommand.getCommand().getClass());
+      return (DeleteContainerCommand) capturedCommand.getCommand();
+    } else {
+      verify(publisher, times(0))
+          .fireEvent(eq(SCMEvents.DATANODE_COMMAND), commandCaptor.capture());
+      return null;
+    }
+  }
+
+  private void verifyForceDeleteCommand(DeleteContainerCommand deleteCmd,
+      ContainerID expectedContainerId, boolean expectedForce, String message) {
+    assertEquals(expectedForce, deleteCmd.isForce(), message);
+    assertEquals(expectedContainerId.getId(), deleteCmd.getContainerID());
+  }
+
+  /**
+   * Verifies the container is in the expected state.
+   */
+  private void verifyContainerState(ContainerID containerId,
+      HddsProtos.LifeCycleState expectedState) throws IOException {
+    assertEquals(expectedState, 
containerManager.getContainer(containerId).getState());
+  }
+
   @Test
   public void testSequenceIdOnStateUpdate() throws Exception {
     ContainerID containerID = ContainerID.valueOf(3L);
@@ -264,4 +437,33 @@ private ContainerInfo allocateContainer()
     return containerInfo;
   }
 
+  private static StorageContainerDatanodeProtocolProtos.ContainerReportsProto 
getContainerReportsProto(
+      final ContainerID containerId,
+      final ContainerReplicaProto.State state,
+      final String originNodeId,
+      final long bcsId,
+      final boolean isEmpty,
+      final int replicaIndex) {
+    final StorageContainerDatanodeProtocolProtos.ContainerReportsProto.Builder 
crBuilder =
+        
StorageContainerDatanodeProtocolProtos.ContainerReportsProto.newBuilder();
+    final ContainerReplicaProto replicaProto =
+        ContainerReplicaProto.newBuilder()
+            .setContainerID(containerId.getProtobuf().getId())
+            .setState(state)
+            .setOriginNodeId(originNodeId)
+            .setSize(5368709120L)
+            .setUsed(isEmpty ? 0L : 2000000000L)
+            .setKeyCount(isEmpty ? 0L : 100000000L)
+            .setReadCount(100000000L)
+            .setWriteCount(100000000L)
+            .setReadBytes(2000000000L)
+            .setWriteBytes(2000000000L)
+            .setBlockCommitSequenceId(bcsId)
+            .setDeleteTransactionId(0)
+            .setReplicaIndex(replicaIndex)
+            .setIsEmpty(isEmpty)
+            .build();
+    return crBuilder.addReports(replicaProto).build();
+  }
+
 }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
index 5327fc78bf0..3617bd2c219 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
@@ -25,6 +25,7 @@
 import static 
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -36,10 +37,11 @@
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
-import org.apache.hadoop.ozone.HddsDatanodeService;
+import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
 import org.apache.hadoop.ozone.TestDataUtil;
 import org.apache.hadoop.ozone.client.ObjectStore;
@@ -50,6 +52,7 @@
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
+import org.apache.ozone.test.GenericTestUtils;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
 
@@ -62,16 +65,16 @@ public class TestContainerReportHandling {
   private static final String KEY = "key1";
 
   /**
-   * Tests that a DELETING (or DELETED) container moves to the CLOSED state if 
a non-empty replica is reported.
+   * Tests that a DELETING (or DELETED) container replica gets deleted when 
replica bcsid <= container bcsid
    * To do this, the test first creates a key and closes its corresponding 
container. Then it moves that container to
    * DELETING (or DELETED) state using ContainerManager. Then it restarts a 
Datanode hosting that container,
    * making it send a full container report.
-   * Finally, the test waits for the container to move from DELETING (or 
DELETED) to CLOSED.
+   * Tests wait for a DELETING (or DELETED) container replica gets deleted 
when replica bcsid <= container bcsid
    */
   @ParameterizedTest
   @EnumSource(value = HddsProtos.LifeCycleState.class,
       names = {"DELETING", "DELETED"})
-  void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsReported(
+  void testDeletingOrDeletedContainerWhenNonEmptyReplicaIsReported(
       HddsProtos.LifeCycleState desiredState)
       throws Exception {
     OzoneConfiguration conf = new OzoneConfiguration();
@@ -97,6 +100,7 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
 
         // move the container to DELETING
         ContainerManager containerManager = 
cluster.getStorageContainerManager().getContainerManager();
+        
assertFalse(containerManager.getContainerReplicas(containerID).isEmpty());
         containerManager.updateContainerState(containerID, 
HddsProtos.LifeCycleEvent.DELETE);
         assertEquals(HddsProtos.LifeCycleState.DELETING, 
containerManager.getContainer(containerID).getState());
 
@@ -106,12 +110,22 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
           assertEquals(HddsProtos.LifeCycleState.DELETED, 
containerManager.getContainer(containerID).getState());
         }
 
-        // restart a DN and wait for the container to get CLOSED.
-        HddsDatanodeService dn = 
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
-        cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
-        waitForContainerStateInSCM(cluster.getStorageContainerManager(), 
containerID, HddsProtos.LifeCycleState.CLOSED);
+        // restart all the DNs
+        List<DatanodeDetails> dnlist = keyLocation.getPipeline().getNodes();
+        for (DatanodeDetails dn: dnlist) {
+          cluster.restartHddsDatanode(dn, false);
+        }
 
-        assertEquals(HddsProtos.LifeCycleState.CLOSED, 
containerManager.getContainer(containerID).getState());
+        // Since replica state is CLOSED and container is DELETED/DELETING in 
SCM
+        // also bcsid of replica and container is same, SCM will trigger 
delete replica
+        // wait for all replica to be deleted
+        GenericTestUtils.waitFor(() -> {
+          try {
+            return 
containerManager.getContainerReplicas(containerID).isEmpty();
+          } catch (ContainerNotFoundException e) {
+            throw new RuntimeException(e);
+          }
+        }, 100, 180000);
       }
     } finally {
       if (clusterPath != null) {
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
index d9337ef716b..fc2a6c9ec63 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
@@ -25,6 +25,7 @@
 import static 
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -37,11 +38,12 @@
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
+import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
 import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
 import org.apache.hadoop.ozone.TestDataUtil;
@@ -53,6 +55,7 @@
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
+import org.apache.ozone.test.GenericTestUtils;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
 
@@ -65,16 +68,16 @@ public class TestContainerReportHandlingWithHA {
   private static final String KEY = "key1";
 
   /**
-   * Tests that a DELETING (or DELETED) container moves to the CLOSED state if 
a non-empty replica is reported.
+   * Tests that a DELETING (or DELETED) container replica gets deleted when 
replica bcsid <= container bcsid
    * To do this, the test first creates a key and closes its corresponding 
container. Then it moves that container to
-   * DELETING (or DELETED) state using ContainerManager. Then it restarts a 
Datanode hosting that container,
+   * DELETING (or DELETED) state using ContainerManager. Then it restarts 
Datanodes hosting that container,
    * making it send a full container report.
-   * Finally, the test waits for the container to move from DELETING (or 
DELETED) to CLOSED in all SCMs.
+   * Tests wait for a DELETING (or DELETED) container replica gets deleted 
when replica bcsid <= container bcsid
    */
   @ParameterizedTest
   @EnumSource(value = HddsProtos.LifeCycleState.class,
       names = {"DELETING", "DELETED"})
-  void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsReportedWithScmHA(
+  void testDeletingOrDeletedContainerWhenNonEmptyReplicaIsReportedWithScmHA(
       HddsProtos.LifeCycleState desiredState)
       throws Exception {
     OzoneConfiguration conf = new OzoneConfiguration();
@@ -100,6 +103,7 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
 
         // move the container to DELETING
         ContainerManager containerManager = 
cluster.getScmLeader().getContainerManager();
+        
assertFalse(containerManager.getContainerReplicas(containerID).isEmpty());
         containerManager.updateContainerState(containerID, 
HddsProtos.LifeCycleEvent.DELETE);
         assertEquals(HddsProtos.LifeCycleState.DELETING, 
containerManager.getContainer(containerID).getState());
 
@@ -109,13 +113,22 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
           assertEquals(HddsProtos.LifeCycleState.DELETED, 
containerManager.getContainer(containerID).getState());
         }
 
-        // restart a DN and wait for the container to get CLOSED in all SCMs
-        HddsDatanodeService dn = 
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
-        cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
-
-        waitForContainerStateInAllSCMs(cluster, containerID, 
HddsProtos.LifeCycleState.CLOSED);
+        // restart all the DNs
+        List<DatanodeDetails> dnlist = keyLocation.getPipeline().getNodes();
+        for (DatanodeDetails dn: dnlist) {
+          cluster.restartHddsDatanode(dn, false);
+        }
 
-        assertEquals(HddsProtos.LifeCycleState.CLOSED, 
containerManager.getContainer(containerID).getState());
+        // Since replica state is CLOSED and container is DELETED/DELETING in 
SCM
+        // also bcsid of replica and container is same, SCM will trigger 
delete replica
+        // wait for all replica to be deleted
+        GenericTestUtils.waitFor(() -> {
+          try {
+            return 
containerManager.getContainerReplicas(containerID).isEmpty();
+          } catch (ContainerNotFoundException e) {
+            throw new RuntimeException(e);
+          }
+        }, 100, 180000);
       }
     } finally {
       if (clusterPath != null) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to