This is an automated email from the ASF dual-hosted git repository.
peterxcli pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-5713 by this push:
new 8eb944a65a HDDS-13212. [DiskBalancer] Fix Inconsistent Health Check in
DiskBalancer Status for Specific Hosts (#8610)
8eb944a65a is described below
commit 8eb944a65a8d31a641d3a2a1a0d2deda4e277506
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Sat Jun 14 15:01:45 2025 +0530
HDDS-13212. [DiskBalancer] Fix Inconsistent Health Check in DiskBalancer
Status for Specific Hosts (#8610)
---
.../hadoop/hdds/scm/node/DiskBalancerManager.java | 20 ++++++--
.../cli/datanode/DiskBalancerStatusSubcommand.java | 2 +-
.../hadoop/ozone/scm/node/TestDiskBalancer.java | 57 +++++++++++++++++++++-
3 files changed, 73 insertions(+), 6 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
index 6a74099413..6b377da6b7 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
@@ -39,6 +39,7 @@
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.storage.DiskBalancerConfiguration;
import org.apache.hadoop.hdds.server.events.EventPublisher;
import org.apache.hadoop.ozone.protocol.commands.CommandForDatanode;
@@ -107,7 +108,21 @@ public List<HddsProtos.DatanodeDiskBalancerInfoProto>
getDiskBalancerStatus(
List<DatanodeDetails> filterDns = null;
if (hosts.isPresent() && !hosts.get().isEmpty()) {
filterDns = NodeUtils.mapHostnamesToDatanodes(nodeManager, hosts.get(),
- useHostnames);
+ useHostnames).stream()
+ .filter(dn -> {
+ try {
+ NodeStatus nodeStatus = nodeManager.getNodeStatus(dn);
+ if (nodeStatus != NodeStatus.inServiceHealthy()) {
+ LOG.warn("Datanode {} is not in optimal state for disk
balancing." +
+ " NodeStatus: {}", dn.getHostName(), nodeStatus);
+ return false;
+ }
+ return true;
+ } catch (NodeNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ })
+ .collect(Collectors.toList());
}
// Filter Running Status by default
@@ -120,8 +135,7 @@ public List<HddsProtos.DatanodeDiskBalancerInfoProto>
getDiskBalancerStatus(
.map(dn -> getInfoProto(dn, clientVersion))
.collect(Collectors.toList());
} else {
- return nodeManager.getNodes(IN_SERVICE,
- HddsProtos.NodeState.HEALTHY).stream()
+ return nodeManager.getNodes(NodeStatus.inServiceHealthy()).stream()
.filter(dn -> shouldReturnDatanode(filterStatus, dn))
.map(dn -> getInfoProto((DatanodeInfo)dn, clientVersion))
.collect(Collectors.toList());
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
index 5ae2b37965..d8d0113eb1 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
@@ -34,7 +34,7 @@
*/
@Command(
name = "status",
- description = "Get Datanode DiskBalancer Status",
+ description = "Get Datanode DiskBalancer Status for inServiceHealthy DNs",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class)
public class DiskBalancerStatusSubcommand extends ScmSubcommand {
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
index 13d361ad2f..f2c60d9899 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
@@ -17,11 +17,16 @@
package org.apache.hadoop.ozone.scm.node;
+import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
+import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+import static org.apache.hadoop.hdds.scm.node.TestNodeUtil.getDNHostAndPort;
+import static
org.apache.hadoop.hdds.scm.node.TestNodeUtil.waitForDnToReachOpState;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
@@ -37,6 +42,8 @@
import
org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity;
import org.apache.hadoop.hdds.scm.node.DatanodeInfo;
import org.apache.hadoop.hdds.scm.node.DiskBalancerManager;
+import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.apache.hadoop.ozone.ClientVersion;
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerService;
@@ -130,7 +137,53 @@ public void testDiskBalancerStopAfterEven() throws
IOException,
}
@Test
- public void testDatanodeDiskBalancerStatus() throws IOException {
- // TODO: Test status command with datanodes in balancing
+ public void testDatanodeDiskBalancerStatus() throws IOException,
InterruptedException, TimeoutException {
+ List<HddsDatanodeService> dns = cluster.getHddsDatanodes();
+ DatanodeDetails toDecommission = dns.get(0).getDatanodeDetails();
+
+ diskBalancerManager.startDiskBalancer(
+ Optional.of(10.0), // threshold
+ Optional.of(10L), // bandwidth in MB
+ Optional.of(5), // parallel threads
+ Optional.of(true), // stopAfterDiskEven
+ Optional.empty());
+
+ //all DNs IN_SERVICE, so disk balancer status for all should be present
+ List<HddsProtos.DatanodeDiskBalancerInfoProto> statusProtoList =
+ diskBalancerManager.getDiskBalancerStatus(Optional.empty(),
+ Optional.empty(),
+ ClientVersion.CURRENT_VERSION);
+ assertEquals(3, statusProtoList.size());
+
+ NodeManager nm = cluster.getStorageContainerManager().getScmNodeManager();
+
+ // Decommission the first DN
+ storageClient.decommissionNodes(Arrays.asList(
+ getDNHostAndPort(toDecommission)), false);
+ waitForDnToReachOpState(nm, toDecommission, DECOMMISSIONING);
+
+ //one DN is in DECOMMISSIONING state, so disk balancer status for it
should not be present
+ statusProtoList =
diskBalancerManager.getDiskBalancerStatus(Optional.empty(),
+ Optional.empty(),
+ ClientVersion.CURRENT_VERSION);
+ assertEquals(2, statusProtoList.size());
+
+ // Check status for the decommissioned DN should not be present
+ statusProtoList = diskBalancerManager.getDiskBalancerStatus(
+
Optional.of(Collections.singletonList(getDNHostAndPort(toDecommission))),
+ Optional.empty(),
+ ClientVersion.CURRENT_VERSION);
+ assertEquals(0, statusProtoList.size());
+
+ storageClient.recommissionNodes(Arrays.asList(
+ getDNHostAndPort(toDecommission)));
+ waitForDnToReachOpState(nm, toDecommission, IN_SERVICE);
+
+ // Check status for the recommissioned DN should now be present
+ statusProtoList = diskBalancerManager.getDiskBalancerStatus(
+
Optional.of(Collections.singletonList(getDNHostAndPort(toDecommission))),
+ Optional.empty(),
+ ClientVersion.CURRENT_VERSION);
+ assertEquals(1, statusProtoList.size());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]