This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 0fda8b28859 HDDS-13544. DN Decommission Fails When Other Datanodes Are 
Offline Due to Invalid Affinity Node in Ratis Replication (#8934)
0fda8b28859 is described below

commit 0fda8b28859855090fd27665edf92d36e84b7f70
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Tue Sep 30 00:57:03 2025 +0530

    HDDS-13544. DN Decommission Fails When Other Datanodes Are Offline Due to 
Invalid Affinity Node in Ratis Replication (#8934)
---
 .../replication/ReplicationManagerUtil.java        |  25 ++++-
 .../replication/TestReplicationManagerUtil.java    |  12 +++
 .../TestReplicationManagerIntegration.java         | 108 ++++++++++++++++++++-
 3 files changed, 139 insertions(+), 6 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java
index 8b2ad758242..b08bdafd788 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java
@@ -55,6 +55,15 @@ public final class ReplicationManagerUtil {
   private ReplicationManagerUtil() {
   }
 
+  private static String formatDatanodeDetails(List<DatanodeDetails> dns) {
+    if (dns == null) {
+      return "[]";
+    }
+    return dns.stream()
+        .map(dn -> String.format("%s[%s]", dn, dn.getPersistedOpState()))
+        .collect(Collectors.toList()).toString();
+  }
+
   /**
    * Using the passed placement policy attempt to select a list of datanodes to
    * use as new targets. If the placement policy is unable to select enough
@@ -102,8 +111,9 @@ public static List<DatanodeDetails> 
getTargetDatanodes(PlacementPolicy policy,
       }
     }
     throw new SCMException(String.format("Placement Policy: %s did not return"
-            + " any nodes. Number of required Nodes %d, Data size Required: 
%d",
-        policy.getClass(), requiredNodes, dataSizeRequired),
+            + " any nodes. Number of required Nodes %d, Data size Required: 
%d. Container: %s, Used Nodes %s, " +
+            "Excluded Nodes: %s.", policy.getClass(), requiredNodes, 
dataSizeRequired, container,
+        formatDatanodeDetails(usedNodes), 
formatDatanodeDetails(excludedNodes)),
         SCMException.ResultCodes.FAILED_TO_FIND_SUITABLE_NODE);
   }
 
@@ -178,6 +188,17 @@ An UNHEALTHY replica with unique origin node id of a 
QUASI_CLOSED container shou
           excludedNodes.add(r.getDatanodeDetails());
           continue;
         }
+        if (nodeStatus.isMaintenance() && nodeStatus.isDead()) {
+          // Dead maintenance nodes are removed from the network topology, so 
the topology logic can't find
+          // out their location and hence can't consider them for figuring out 
rack placement. So, we don't add them
+          // to the used nodes list. We also don't add them to excluded nodes, 
as the placement policy logic won't
+          // consider a node that's not in the topology anyway. In fact, 
adding it to excluded nodes will cause a
+          // problem if total nodes (in topology) + required nodes becomes 
less than excluded + used nodes.
+
+          // TODO: In the future, can the policy logic be changed to use the 
DatanodeDetails network location to figure
+          //  out  the rack?
+          continue;
+        }
       } catch (NodeNotFoundException e) {
         LOG.warn("Node {} not found in node manager.", r.getDatanodeDetails());
         // This should not happen, but if it does, just add the node to the
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerUtil.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerUtil.java
index a7718bdf24c..a3573dc2e47 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerUtil.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerUtil.java
@@ -25,6 +25,7 @@
 import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.mockito.Mockito.any;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
@@ -97,6 +98,12 @@ public void testGetExcludedAndUsedNodes() throws 
NodeNotFoundException {
             IN_MAINTENANCE, ContainerReplicaProto.State.CLOSED, 1);
     replicas.add(maintenance);
 
+    // dead maintenance node should neither be on the used list nor on the 
excluded list
+    ContainerReplica deadMaintenanceReplica = createContainerReplica(cid, 0,
+        IN_MAINTENANCE, ContainerReplicaProto.State.CLOSED, 1);
+    DatanodeDetails deadMaintenanceNode = 
deadMaintenanceReplica.getDatanodeDetails();
+    replicas.add(deadMaintenanceReplica);
+
     // Take one of the replicas and set it to be removed. It should be on the
     // excluded list rather than the used list.
     Set<ContainerReplica> toBeRemoved = new HashSet<>();
@@ -115,6 +122,9 @@ public void testGetExcludedAndUsedNodes() throws 
NodeNotFoundException {
     when(replicationManager.getNodeStatus(any())).thenAnswer(
         invocation -> {
           final DatanodeDetails dn = invocation.getArgument(0);
+          if (dn.equals(deadMaintenanceNode)) {
+            return NodeStatus.valueOf(dn.getPersistedOpState(), 
HddsProtos.NodeState.DEAD);
+          }
           for (ContainerReplica r : replicas) {
             if (r.getDatanodeDetails().equals(dn)) {
               return NodeStatus.valueOf(
@@ -137,6 +147,7 @@ public void testGetExcludedAndUsedNodes() throws 
NodeNotFoundException {
         .contains(maintenance.getDatanodeDetails());
     assertThat(excludedAndUsedNodes.getUsedNodes())
         .contains(pendingAdd);
+    
assertFalse(excludedAndUsedNodes.getUsedNodes().contains(deadMaintenanceNode));
 
     assertEquals(4, excludedAndUsedNodes.getExcludedNodes().size());
     assertThat(excludedAndUsedNodes.getExcludedNodes())
@@ -147,6 +158,7 @@ public void testGetExcludedAndUsedNodes() throws 
NodeNotFoundException {
         .contains(remove.getDatanodeDetails());
     assertThat(excludedAndUsedNodes.getExcludedNodes())
         .contains(pendingDelete);
+    
assertFalse(excludedAndUsedNodes.getExcludedNodes().contains(deadMaintenanceNode));
   }
 
   @Test
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerIntegration.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerIntegration.java
index 8556a01e533..21abf3f7772 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerIntegration.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerIntegration.java
@@ -25,6 +25,7 @@
 import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL;
 import static 
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL;
 import static 
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT;
+import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
 import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD;
@@ -40,6 +41,7 @@
 import static 
org.apache.hadoop.hdds.scm.node.TestNodeUtil.waitForDnToReachPersistedOpState;
 import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SCM_CLOSE_CONTAINER_WAIT_DURATION;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.nio.charset.StandardCharsets;
 import java.time.Duration;
@@ -47,6 +49,7 @@
 import java.util.List;
 import java.util.Set;
 import java.util.UUID;
+import java.util.stream.Collectors;
 import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
@@ -59,6 +62,7 @@
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
 import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
+import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
 import 
org.apache.hadoop.hdds.scm.container.replication.ReplicationManager.ReplicationManagerConfiguration;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
@@ -71,8 +75,8 @@
 import org.apache.hadoop.ozone.client.OzoneKeyDetails;
 import org.apache.hadoop.ozone.client.OzoneKeyLocation;
 import org.apache.ozone.test.GenericTestUtils;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.MethodOrderer;
 import org.junit.jupiter.api.Order;
 import org.junit.jupiter.api.Test;
@@ -104,7 +108,7 @@ class TestReplicationManagerIntegration {
   private ContainerOperationClient scmClient;
   private OzoneBucket bucket;
 
-  @BeforeAll
+  @BeforeEach
   void init() throws Exception {
     OzoneConfiguration conf = new OzoneConfiguration();
 
@@ -152,7 +156,7 @@ void init() throws Exception {
     bucket = TestDataUtil.createVolumeAndBucket(client);
   }
 
-  @AfterAll
+  @AfterEach
   void shutdown() {
     IOUtils.close(LOG, client, scmClient, cluster);
   }
@@ -260,4 +264,100 @@ void 
testClosedContainerReplicationWhenNodeDecommissionAndBackToInService(
       }
     }, 200, 30000);
   }
+
+  /**
+   * A node containing a replica of a Ratis container is put into maintenance 
with no expiry. When it is maintenance, it
+   * is shutdown and the test waits until it's handled as dead. Then another 
node containing this container's replica is
+   * decommissioned. The expectation is that it should successfully 
decommission.
+   */
+  @Test
+  public void testDeadMaintenanceNodeAndDecommission() throws Exception {
+    String keyName = "key-" + UUID.randomUUID();
+    TestDataUtil.createKey(bucket, keyName, RATIS_REPLICATION_CONFIG,
+        "this is the content".getBytes(StandardCharsets.UTF_8));
+
+    OzoneKeyDetails key = bucket.getKey(keyName);
+    List<OzoneKeyLocation> keyLocations = key.getOzoneKeyLocations();
+
+    long iD = keyLocations.get(0).getContainerID();
+    ContainerID containerId = ContainerID.valueOf(iD);
+    ContainerInfo containerInfo = containerManager.getContainer(containerId);
+    OzoneTestUtils.closeContainer(scm, containerInfo);
+
+    assertEquals(HEALTHY_REPLICA_NUM,
+        containerManager.getContainerReplicas(containerId).size());
+
+    List<DatanodeDetails> dns = containerManager
+        .getContainerReplicas(containerId)
+        .stream().map(ContainerReplica::getDatanodeDetails)
+        .collect(Collectors.toList());
+
+    DatanodeDetails maintenanceDn = dns.get(0);
+    DatanodeDetails decomDn = dns.get(1);
+
+    
scmClient.startMaintenanceNodes(Collections.singletonList(getDNHostAndPort(maintenanceDn)),
 0, false);
+    waitForDnToReachOpState(nodeManager, maintenanceDn, IN_MAINTENANCE);
+    cluster.shutdownHddsDatanode(maintenanceDn);
+    waitForDnToReachHealthState(nodeManager, maintenanceDn, DEAD);
+
+    ContainerReplicaCount containerReplicaCount = 
replicationManager.getContainerReplicaCount(containerId);
+    assertTrue(containerReplicaCount.isSufficientlyReplicated());
+
+    
scmClient.decommissionNodes(Collections.singletonList(getDNHostAndPort(decomDn)),
 false);
+    waitForDnToReachOpState(nodeManager, decomDn, DECOMMISSIONED);
+
+    assertEquals(HEALTHY_REPLICA_NUM + 1, 
containerManager.getContainerReplicas(containerId).size());
+    ReplicationManagerReport report = new ReplicationManagerReport();
+    replicationManager.checkContainerStatus(containerInfo, report);
+    assertEquals(0, 
report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+    assertEquals(0, 
report.getStat(ReplicationManagerReport.HealthState.MIS_REPLICATED));
+    assertEquals(0, 
report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED));
+  }
+
+  @Test
+  public void 
testOneDeadMaintenanceNodeAndOneLiveMaintenanceNodeAndOneDecommissionNode() 
throws Exception {
+    String keyName = "key-" + UUID.randomUUID();
+    TestDataUtil.createKey(bucket, keyName, RATIS_REPLICATION_CONFIG,
+        "this is the content".getBytes(StandardCharsets.UTF_8));
+
+    OzoneKeyDetails key = bucket.getKey(keyName);
+    List<OzoneKeyLocation> keyLocations = key.getOzoneKeyLocations();
+
+    long iD = keyLocations.get(0).getContainerID();
+    ContainerID containerId = ContainerID.valueOf(iD);
+    ContainerInfo containerInfo = containerManager.getContainer(containerId);
+    OzoneTestUtils.closeContainer(scm, containerInfo);
+
+    assertEquals(HEALTHY_REPLICA_NUM,
+        containerManager.getContainerReplicas(containerId).size());
+
+    List<DatanodeDetails> dns = containerManager
+        .getContainerReplicas(containerId)
+        .stream().map(ContainerReplica::getDatanodeDetails)
+        .collect(Collectors.toList());
+
+    DatanodeDetails maintenanceDn = dns.get(0);
+    DatanodeDetails decomDn = dns.get(1);
+    DatanodeDetails secondMaintenanceDn = dns.get(2);
+
+    
scmClient.startMaintenanceNodes(Collections.singletonList(getDNHostAndPort(maintenanceDn)),
 0, false);
+    waitForDnToReachOpState(nodeManager, maintenanceDn, IN_MAINTENANCE);
+    cluster.shutdownHddsDatanode(maintenanceDn);
+    waitForDnToReachHealthState(nodeManager, maintenanceDn, DEAD);
+
+    ContainerReplicaCount containerReplicaCount = 
replicationManager.getContainerReplicaCount(containerId);
+    assertTrue(containerReplicaCount.isSufficientlyReplicated());
+
+    
scmClient.decommissionNodes(Collections.singletonList(getDNHostAndPort(decomDn)),
 false);
+    
scmClient.startMaintenanceNodes(Collections.singletonList(getDNHostAndPort(secondMaintenanceDn)),
 0, false);
+    waitForDnToReachOpState(nodeManager, decomDn, DECOMMISSIONED);
+    waitForDnToReachOpState(nodeManager, secondMaintenanceDn, IN_MAINTENANCE);
+
+    assertEquals(HEALTHY_REPLICA_NUM + 2, 
containerManager.getContainerReplicas(containerId).size());
+    ReplicationManagerReport report = new ReplicationManagerReport();
+    replicationManager.checkContainerStatus(containerInfo, report);
+    assertEquals(0, 
report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+    assertEquals(0, 
report.getStat(ReplicationManagerReport.HealthState.MIS_REPLICATED));
+    assertEquals(0, 
report.getStat(ReplicationManagerReport.HealthState.OVER_REPLICATED));
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to