This is an automated email from the ASF dual-hosted git repository.
szetszwo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 8d9b50130a6 HDDS-14371.
ContainerManagerImpl.getOpenContainerCountPerPipeline could have division by
zero (#9621)
8d9b50130a6 is described below
commit 8d9b50130a677876a77fcaafdb32b8dc2fa10f1c
Author: Russole <[email protected]>
AuthorDate: Tue Jan 13 06:44:01 2026 +0800
HDDS-14371. ContainerManagerImpl.getOpenContainerCountPerPipeline could
have division by zero (#9621)
---
.../hdds/scm/container/ContainerManagerImpl.java | 16 +-------
.../apache/hadoop/hdds/scm/node/NodeManager.java | 6 +--
.../hadoop/hdds/scm/node/SCMNodeManager.java | 47 ++++++++++------------
.../hadoop/hdds/scm/pipeline/PipelineManager.java | 6 +--
.../hdds/scm/pipeline/PipelineManagerImpl.java | 9 +----
.../hadoop/hdds/scm/block/TestBlockManager.java | 35 ++++++++++------
.../hadoop/hdds/scm/container/MockNodeManager.java | 10 +----
.../hdds/scm/container/SimpleMockNodeManager.java | 9 +----
.../hadoop/hdds/scm/node/TestSCMNodeManager.java | 12 ++++--
.../hdds/scm/pipeline/MockPipelineManager.java | 16 +++-----
10 files changed, 69 insertions(+), 97 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java
index c93383e40db..6e5859f27b1 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerManagerImpl.java
@@ -76,8 +76,6 @@ public class ContainerManagerImpl implements ContainerManager
{
// Metrics related to operations should be moved to ProtocolServer
private final SCMContainerManagerMetrics scmContainerManagerMetrics;
- private final int numContainerPerVolume;
-
@SuppressWarnings("java:S2245") // no need for secure random
private final Random random = new Random();
@@ -108,10 +106,6 @@ public ContainerManagerImpl(
.setContainerReplicaPendingOps(containerReplicaPendingOps)
.build();
- this.numContainerPerVolume = conf
- .getInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT,
- ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
-
maxContainerSize = (long)
conf.getStorageSize(ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE,
ScmConfigKeys.OZONE_SCM_CONTAINER_SIZE_DEFAULT, StorageUnit.BYTES);
@@ -355,7 +349,7 @@ public ContainerInfo getMatchingContainer(final long size,
final String owner,
try {
synchronized (pipeline.getId()) {
containerIDs = getContainersForOwner(pipeline, owner);
- if (containerIDs.size() < getOpenContainerCountPerPipeline(pipeline)) {
+ if (containerIDs.size() <
pipelineManager.openContainerLimit(pipeline.getNodes())) {
if (pipelineManager.hasEnoughSpace(pipeline, maxContainerSize)) {
allocateContainer(pipeline, owner);
containerIDs = getContainersForOwner(pipeline, owner);
@@ -383,14 +377,6 @@ public ContainerInfo getMatchingContainer(final long size,
final String owner,
}
}
- private int getOpenContainerCountPerPipeline(Pipeline pipeline) {
- int minContainerCountPerDn = numContainerPerVolume *
- pipelineManager.minHealthyVolumeNum(pipeline);
- int minPipelineCountPerDn = pipelineManager.minPipelineLimit(pipeline);
- return (int) Math.ceil(
- ((double) minContainerCountPerDn / minPipelineCountPerDn));
- }
-
/**
* Returns the container ID's matching with specified owner.
* @param pipeline
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java
index 0d8b3a8362e..e9a019945c1 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeManager.java
@@ -393,14 +393,10 @@ Map<SCMCommandProto.Type, Integer>
getTotalDatanodeCommandCounts(
*/
NetworkTopology getClusterNetworkTopologyMap();
- int minHealthyVolumeNum(List <DatanodeDetails> dnList);
-
int totalHealthyVolumeCount();
int pipelineLimit(DatanodeDetails dn);
- int minPipelineLimit(List<DatanodeDetails> dn);
-
/**
* Gets the peers in all the pipelines for the particular datnode.
* @param dn datanode
@@ -424,4 +420,6 @@ default void forceNodesToHealthyReadOnly() { }
default void removeNode(DatanodeDetails datanodeDetails) throws
NodeNotFoundException, IOException {
}
+
+ int openContainerLimit(List<DatanodeDetails> datanodes);
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
index c3ef2238769..26d9e49bafc 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java
@@ -140,6 +140,7 @@ public class SCMNodeManager implements NodeManager {
private final Map<SCMCommandProto.Type,
BiConsumer<DatanodeDetails, SCMCommand<?>>> sendCommandNotifyMap;
private final NonWritableNodeFilter nonWritableNodeFilter;
+ private final int numContainerPerVolume;
/**
* Lock used to synchronize some operation in Node manager to ensure a
@@ -196,6 +197,9 @@ public SCMNodeManager(
ScmConfigKeys.OZONE_SCM_PIPELINE_PER_METADATA_VOLUME_DEFAULT);
String dnLimit = conf.get(ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT);
this.heavyNodeCriteria = dnLimit == null ? 0 : Integer.parseInt(dnLimit);
+ this.numContainerPerVolume = conf.getInt(
+ ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT,
+ ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT);
this.scmContext = scmContext;
this.sendCommandNotifyMap = new HashMap<>();
this.nonWritableNodeFilter = new NonWritableNodeFilter(conf);
@@ -1561,24 +1565,21 @@ public String getLabel() {
}
}
- /**
- * Returns the min of no healthy volumes reported out of the set
- * of datanodes constituting the pipeline.
- */
@Override
- public int minHealthyVolumeNum(List<DatanodeDetails> dnList) {
- List<Integer> volumeCountList = new ArrayList<>(dnList.size());
- for (DatanodeDetails dn : dnList) {
- try {
- volumeCountList.add(nodeStateManager.getNode(dn).
- getHealthyVolumeCount());
- } catch (NodeNotFoundException e) {
- LOG.warn("Cannot generate NodeStat, datanode {} not found.",
- dn.getID());
+ public int openContainerLimit(List<DatanodeDetails> datanodes) {
+ int min = Integer.MAX_VALUE;
+ for (DatanodeDetails dn : datanodes) {
+ final int pipelineLimit = pipelineLimit(dn);
+ if (pipelineLimit <= 0) {
+ return 0;
+ }
+
+ final int containerLimit = 1 + (numContainerPerVolume *
getHealthyVolumeCount(dn) - 1) / pipelineLimit;
+ if (containerLimit < min) {
+ min = containerLimit;
}
}
- Preconditions.checkArgument(!volumeCountList.isEmpty());
- return Collections.min(volumeCountList);
+ return min;
}
@Override
@@ -1614,17 +1615,13 @@ public int pipelineLimit(DatanodeDetails dn) {
return 0;
}
- /**
- * Returns the pipeline limit for set of datanodes.
- */
- @Override
- public int minPipelineLimit(List<DatanodeDetails> dnList) {
- List<Integer> pipelineCountList = new ArrayList<>(dnList.size());
- for (DatanodeDetails dn : dnList) {
- pipelineCountList.add(pipelineLimit(dn));
+ private int getHealthyVolumeCount(DatanodeDetails dn) {
+ try {
+ return nodeStateManager.getNode(dn).getHealthyVolumeCount();
+ } catch (NodeNotFoundException e) {
+ LOG.warn("Failed to getHealthyVolumeCount, datanode {} not found.",
dn.getID());
+ return 0;
}
- Preconditions.checkArgument(!pipelineCountList.isEmpty());
- return Collections.min(pipelineCountList);
}
@Override
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
index 06954b64222..b4fe1559fd4 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java
@@ -125,10 +125,6 @@ void addContainerToPipeline(PipelineID pipelineID,
ContainerID containerID)
void incNumBlocksAllocatedMetric(PipelineID id);
- int minHealthyVolumeNum(Pipeline pipeline);
-
- int minPipelineLimit(Pipeline pipeline);
-
/**
* Activates a dormant pipeline.
*
@@ -224,4 +220,6 @@ void reinitialize(Table<PipelineID, Pipeline> pipelineStore)
* containerSize, otherwise true
*/
boolean hasEnoughSpace(Pipeline pipeline, long containerSize);
+
+ int openContainerLimit(List<DatanodeDetails> datanodes);
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java
index d0d261b21fd..f53275c65b5 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManagerImpl.java
@@ -671,13 +671,8 @@ public void incNumBlocksAllocatedMetric(PipelineID id) {
}
@Override
- public int minHealthyVolumeNum(Pipeline pipeline) {
- return nodeManager.minHealthyVolumeNum(pipeline.getNodes());
- }
-
- @Override
- public int minPipelineLimit(Pipeline pipeline) {
- return nodeManager.minPipelineLimit(pipeline.getNodes());
+ public int openContainerLimit(List<DatanodeDetails> datanodes) {
+ return nodeManager.openContainerLimit(datanodes);
}
/**
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
index 6a85a3dcf32..45c947cb00a 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java
@@ -25,6 +25,7 @@
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.File;
import java.io.IOException;
@@ -304,12 +305,16 @@ void testBlockDistribution() throws Exception {
futureList.add(future);
}
CompletableFuture.allOf(futureList.toArray(new
CompletableFuture[0])).get();
-
+ Pipeline pipeline = pipelineManager.getPipelines(replicationConfig).get(0);
+ int expectedContainers =
pipelineManager.openContainerLimit(pipeline.getNodes());
assertEquals(1, pipelineManager.getPipelines(replicationConfig).size());
- assertEquals(numContainerPerOwnerInPipeline, allocatedBlockMap.size());
- assertEquals(numContainerPerOwnerInPipeline,
allocatedBlockMap.values().size());
+ assertEquals(expectedContainers, allocatedBlockMap.size());
+ assertEquals(expectedContainers, allocatedBlockMap.values().size());
+ int floor = threadCount / expectedContainers;
+ int ceil = (threadCount + expectedContainers - 1) / expectedContainers;
allocatedBlockMap.values().forEach(v -> {
- assertEquals(numContainerPerOwnerInPipeline, v.size());
+ int sz = v.size();
+ assertTrue(sz == floor || sz == ceil, "Unexpected blocks per container:
" + sz);
});
}
@@ -422,13 +427,11 @@ void testBlockDistributionWithMultipleRaftLogDisks()
throws Exception {
pipelineManager.getPipelines(replicationConfig).size());
Pipeline pipeline =
pipelineManager.getPipelines(replicationConfig).get(0);
- // the pipeline per raft log disk config is set to 1 by default
- int numContainers = (int)Math.ceil((double)
- (numContainerPerOwnerInPipeline *
- numContainerPerOwnerInPipeline) / numMetaDataVolumes);
- assertEquals(numContainers,
pipelineManager.getNumberOfContainers(pipeline.getId()));
- assertEquals(numContainers, allocatedBlockMap.size());
- assertEquals(numContainers, allocatedBlockMap.values().size());
+ int expectedContainers =
+ pipelineManager.openContainerLimit(pipeline.getNodes());
+ assertEquals(expectedContainers,
pipelineManager.getNumberOfContainers(pipeline.getId()));
+ assertEquals(expectedContainers, allocatedBlockMap.size());
+ assertEquals(expectedContainers, allocatedBlockMap.values().size());
}
@Test
@@ -526,7 +529,7 @@ public void testMultipleBlockAllocationWithClosedContainer()
} catch (IOException e) {
}
return verifyNumberOfContainersInPipelines(
- numContainerPerOwnerInPipeline);
+ expectedContainersPerPipeline());
}, 10, 1000);
// close all the containers in all the pipelines
@@ -551,7 +554,7 @@ public void testMultipleBlockAllocationWithClosedContainer()
} catch (IOException e) {
}
return verifyNumberOfContainersInPipelines(
- numContainerPerOwnerInPipeline);
+ expectedContainersPerPipeline());
}, 10, 1000);
}
@@ -584,4 +587,10 @@ public void onMessage(final CommandForDatanode command,
}
}
}
+
+ private int expectedContainersPerPipeline() {
+ Pipeline pipeline = pipelineManager.getPipelines(replicationConfig).get(0);
+
+ return pipelineManager.openContainerLimit(pipeline.getNodes());
+ }
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
index 9ecf41a42e4..013f14b1650 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/MockNodeManager.java
@@ -912,11 +912,6 @@ public void setNetworkTopology(NetworkTopology topology) {
this.clusterMap = topology;
}
- @Override
- public int minHealthyVolumeNum(List<DatanodeDetails> dnList) {
- return numHealthyDisksPerDatanode;
- }
-
@Override
public int totalHealthyVolumeCount() {
return healthyNodes.size() * numHealthyDisksPerDatanode;
@@ -929,9 +924,8 @@ public int pipelineLimit(DatanodeDetails dn) {
}
@Override
- public int minPipelineLimit(List<DatanodeDetails> dn) {
- // by default 1 single node pipeline and 1 three node pipeline
- return numPipelinePerDatanode;
+ public int openContainerLimit(List<DatanodeDetails> datanodes) {
+ return 9;
}
@Override
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java
index e43c4b7d68b..4f0679470ea 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/SimpleMockNodeManager.java
@@ -364,11 +364,6 @@ public NetworkTopology getClusterNetworkTopologyMap() {
return null;
}
- @Override
- public int minHealthyVolumeNum(List<DatanodeDetails> dnList) {
- return 0;
- }
-
@Override
public int totalHealthyVolumeCount() {
return 0;
@@ -380,8 +375,8 @@ public int pipelineLimit(DatanodeDetails dn) {
}
@Override
- public int minPipelineLimit(List<DatanodeDetails> dn) {
- return 0;
+ public int openContainerLimit(List<DatanodeDetails> datanodes) {
+ return 9;
}
@Override
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
index 985283ddac2..84b548a4c04 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestSCMNodeManager.java
@@ -1487,7 +1487,7 @@ public void testScmCanHandleScale()
*/
@Test
public void testScmStatsFromNodeReport()
- throws IOException, InterruptedException, AuthenticationException {
+ throws IOException, InterruptedException, AuthenticationException,
NodeNotFoundException {
OzoneConfiguration conf = getConf();
conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 1000,
MILLISECONDS);
@@ -1518,7 +1518,10 @@ public void testScmStatsFromNodeReport()
assertEquals(capacity * nodeCount, (long)
nodeManager.getStats().getCapacity().get());
assertEquals(used * nodeCount, (long)
nodeManager.getStats().getScmUsed().get());
assertEquals(remaining * nodeCount, (long)
nodeManager.getStats().getRemaining().get());
- assertEquals(1, nodeManager.minHealthyVolumeNum(dnList));
+ for (DatanodeDetails dn : dnList) {
+ assertEquals(1,
+
nodeManager.getNodeStateManager().getNode(dn).getHealthyVolumeCount());
+ }
dnList.clear();
}
}
@@ -1576,7 +1579,7 @@ public void testCalculateStoragePercentage(long
perCapacity,
*/
@Test
public void tesVolumeInfoFromNodeReport()
- throws IOException, InterruptedException, AuthenticationException {
+ throws IOException, InterruptedException, AuthenticationException,
NodeNotFoundException {
OzoneConfiguration conf = getConf();
conf.setTimeDuration(OZONE_SCM_HEARTBEAT_PROCESS_INTERVAL, 1000,
MILLISECONDS);
@@ -1606,7 +1609,8 @@ public void tesVolumeInfoFromNodeReport()
eventQueue.processAll(8000L);
assertEquals(1, nodeManager.getNodeCount(NodeStatus.inServiceHealthy()));
- assertEquals(volumeCount / 2, nodeManager.minHealthyVolumeNum(dnList));
+ assertEquals(volumeCount / 2,
+
nodeManager.getNodeStateManager().getNode(dn).getHealthyVolumeCount());
dnList.clear();
}
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java
index cdbb7999d3a..04a1dc1bd4e 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/MockPipelineManager.java
@@ -267,16 +267,6 @@ public void incNumBlocksAllocatedMetric(final PipelineID
id) {
}
- @Override
- public int minHealthyVolumeNum(Pipeline pipeline) {
- return 0;
- }
-
- @Override
- public int minPipelineLimit(Pipeline pipeline) {
- return 0;
- }
-
@Override
public void activatePipeline(final PipelineID pipelineID) {
}
@@ -345,4 +335,10 @@ public boolean isPipelineCreationFrozen() {
public boolean hasEnoughSpace(Pipeline pipeline, long containerSize) {
return false;
}
+
+ @Override
+ public int openContainerLimit(List<DatanodeDetails> datanodes) {
+ // For tests that do not care about this limit, return a large value.
+ return Integer.MAX_VALUE;
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]