This is an automated email from the ASF dual-hosted git repository.
sammichen pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-5713 by this push:
new c00288af23d HDDS-13611. [DiskBalancer] Inconsistent VolumeDataDensity
calculations between SCM and DN and incorrect EstBytesToMove (#8972)
c00288af23d is described below
commit c00288af23d39735b52d6782a627d3d9d4afed46
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Fri Oct 10 13:43:53 2025 +0530
HDDS-13611. [DiskBalancer] Inconsistent VolumeDataDensity calculations
between SCM and DN and incorrect EstBytesToMove (#8972)
---
.../container/common/volume/MutableVolumeSet.java | 13 ---
.../container/diskbalancer/DiskBalancerInfo.java | 6 +-
.../diskbalancer/DiskBalancerService.java | 73 +++++++-----
.../DiskBalancerVolumeCalculation.java | 122 +++++++++++++++++++++
.../policy/DefaultVolumeChoosingPolicy.java | 16 ++-
.../diskbalancer/TestDiskBalancerService.java | 5 +-
.../proto/ScmServerDatanodeHeartbeatProtocol.proto | 1 +
.../hadoop/hdds/scm/node/DiskBalancerManager.java | 53 ++-------
.../hadoop/hdds/scm/node/DiskBalancerStatus.java | 9 +-
.../hdds/scm/node/TestDiskBalancerManager.java | 7 ++
.../hadoop/ozone/scm/node/TestDiskBalancer.java | 27 +++++
11 files changed, 240 insertions(+), 92 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
index 8ce114cfd40..9a691fe92e0 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/MutableVolumeSet.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.ozone.container.common.volume;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.io.IOException;
@@ -34,7 +33,6 @@
import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.fs.SpaceUsageCheckFactory;
-import org.apache.hadoop.hdds.fs.SpaceUsageSource;
import org.apache.hadoop.hdds.utils.HddsServerUtil;
import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport;
@@ -507,15 +505,4 @@ public StorageVolume.VolumeType getVolumeType() {
public VolumeHealthMetrics getVolumeHealthMetrics() {
return volumeHealthMetrics;
}
-
- public double getIdealUsage() {
- long totalCapacity = 0L, totalFree = 0L;
- for (StorageVolume volume: volumeMap.values()) {
- SpaceUsageSource usage = volume.getCurrentUsage();
- totalCapacity += usage.getCapacity();
- totalFree += usage.getAvailable();
- }
- Preconditions.checkArgument(totalCapacity != 0);
- return ((double) (totalCapacity - totalFree)) / totalCapacity;
- }
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
index 579d3db23f2..882c8c71016 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerInfo.java
@@ -37,6 +37,7 @@ public class DiskBalancerInfo {
private long failureCount;
private long bytesToMove;
private long balancedBytes;
+ private double volumeDataDensity;
public DiskBalancerInfo(DiskBalancerOperationalState operationalState,
double threshold,
long bandwidthInMB, int parallelThread, boolean stopAfterDiskEven) {
@@ -57,7 +58,7 @@ public DiskBalancerInfo(DiskBalancerOperationalState
operationalState, double th
@SuppressWarnings("checkstyle:ParameterNumber")
public DiskBalancerInfo(DiskBalancerOperationalState operationalState,
double threshold,
long bandwidthInMB, int parallelThread, boolean stopAfterDiskEven,
DiskBalancerVersion version,
- long successCount, long failureCount, long bytesToMove, long
balancedBytes) {
+ long successCount, long failureCount, long bytesToMove, long
balancedBytes, double volumeDataDensity) {
this.operationalState = operationalState;
this.threshold = threshold;
this.bandwidthInMB = bandwidthInMB;
@@ -68,6 +69,7 @@ public DiskBalancerInfo(DiskBalancerOperationalState
operationalState, double th
this.failureCount = failureCount;
this.bytesToMove = bytesToMove;
this.balancedBytes = balancedBytes;
+ this.volumeDataDensity = volumeDataDensity;
}
public DiskBalancerInfo(boolean shouldRun,
@@ -112,6 +114,8 @@ public
StorageContainerDatanodeProtocolProtos.DiskBalancerReportProto toDiskBala
builder.setFailureMoveCount(failureCount);
builder.setBytesToMove(bytesToMove);
builder.setBalancedBytes(balancedBytes);
+ builder.setVolumeDataDensity(volumeDataDensity);
+
return builder.build();
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
index 2eabf1ac236..aa9dd257678 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
@@ -21,6 +21,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableList;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
@@ -38,6 +39,7 @@
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.StorageUnit;
+import org.apache.hadoop.hdds.fs.SpaceUsageSource;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -121,7 +123,6 @@ public class DiskBalancerService extends BackgroundService {
private final File diskBalancerInfoFile;
private DiskBalancerServiceMetrics metrics;
- private long bytesToMove;
private long containerDefaultSize;
/**
@@ -401,8 +402,7 @@ public BackgroundTaskQueue getTasks() {
}
}
- if (queue.isEmpty()) {
- bytesToMove = 0;
+ if (queue.isEmpty() && inProgressContainers.isEmpty()) {
if (stopAfterDiskEven) {
LOG.info("Disk balancer is stopped due to disk even as" +
" the property StopAfterDiskEven is set to true.");
@@ -415,8 +415,6 @@ public BackgroundTaskQueue getTasks() {
}
}
metrics.incrIdleLoopNoAvailableVolumePairCount();
- } else {
- bytesToMove = calculateBytesToMove(volumeSet);
}
return queue;
@@ -620,43 +618,58 @@ private void postCall(boolean success, long startTime) {
}
public DiskBalancerInfo getDiskBalancerInfo() {
- return new DiskBalancerInfo(operationalState, threshold, bandwidthInMB,
- parallelThread, stopAfterDiskEven, version, metrics.getSuccessCount(),
- metrics.getFailureCount(), bytesToMove, metrics.getSuccessBytes());
- }
+ ImmutableList<HddsVolume> immutableVolumeSet =
DiskBalancerVolumeCalculation.getImmutableVolumeSet(volumeSet);
- public long calculateBytesToMove(MutableVolumeSet inputVolumeSet) {
- long bytesPendingToMove = 0;
- long totalFreeSpace = 0;
- long totalCapacity = 0;
+ // Calculate volumeDataDensity
+ double volumeDatadensity = 0.0;
+ volumeDatadensity =
DiskBalancerVolumeCalculation.calculateVolumeDataDensity(immutableVolumeSet,
deltaSizes);
- for (HddsVolume volume :
StorageVolumeUtil.getHddsVolumesList(inputVolumeSet.getVolumesList())) {
- totalFreeSpace += volume.getCurrentUsage().getAvailable();
- totalCapacity += volume.getCurrentUsage().getCapacity();
+ long bytesToMove = 0;
+ if (this.operationalState == DiskBalancerOperationalState.RUNNING) {
+ // this calculates live changes in bytesToMove
+ // calculate bytes to move if the balancer is in a running state, else 0.
+ bytesToMove = calculateBytesToMove(immutableVolumeSet);
}
- if (totalCapacity == 0) {
+ return new DiskBalancerInfo(operationalState, threshold, bandwidthInMB,
+ parallelThread, stopAfterDiskEven, version, metrics.getSuccessCount(),
+ metrics.getFailureCount(), bytesToMove, metrics.getSuccessBytes(),
volumeDatadensity);
+ }
+
+ public long calculateBytesToMove(ImmutableList<HddsVolume> inputVolumeSet) {
+ // If there are no available volumes or only one volume, return 0 bytes to
move
+ if (inputVolumeSet.isEmpty() || inputVolumeSet.size() < 2) {
return 0;
}
- double datanodeUtilization = ((double) (totalCapacity - totalFreeSpace)) /
totalCapacity;
+ // Calculate ideal usage
+ double idealUsage =
DiskBalancerVolumeCalculation.getIdealUsage(inputVolumeSet);
+ double normalizedThreshold = threshold / 100.0;
- double thresholdFraction = threshold / 100.0;
- double upperLimit = datanodeUtilization + thresholdFraction;
+ long totalBytesToMove = 0;
// Calculate excess data in overused volumes
- for (HddsVolume volume :
StorageVolumeUtil.getHddsVolumesList(inputVolumeSet.getVolumesList())) {
- long freeSpace = volume.getCurrentUsage().getAvailable();
- long capacity = volume.getCurrentUsage().getCapacity();
- double volumeUtilization = ((double) (capacity - freeSpace)) / capacity;
-
- // Consider only volumes exceeding the upper threshold
- if (volumeUtilization > upperLimit) {
- long excessData = (capacity - freeSpace) - (long) (upperLimit *
capacity);
- bytesPendingToMove += excessData;
+ for (HddsVolume volume : inputVolumeSet) {
+ SpaceUsageSource usage = volume.getCurrentUsage();
+
+ if (usage.getCapacity() == 0) {
+ continue;
+ }
+
+ long deltaSize = deltaSizes.getOrDefault(volume, 0L);
+ double currentUsage = (double)((usage.getCapacity() -
usage.getAvailable())
+ + deltaSize + volume.getCommittedBytes()) / usage.getCapacity();
+
+ double volumeUtilisation = currentUsage - idealUsage;
+
+ // Only consider volumes that exceed the threshold (source volumes)
+ if (volumeUtilisation >= normalizedThreshold) {
+ // Calculate excess bytes that need to be moved from this volume
+ long excessBytes = (long) ((volumeUtilisation - normalizedThreshold) *
usage.getCapacity());
+ totalBytesToMove += Math.max(0, excessBytes);
}
}
- return bytesPendingToMove;
+ return totalBytesToMove;
}
private Path getDiskBalancerTmpDir(HddsVolume hddsVolume) {
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerVolumeCalculation.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerVolumeCalculation.java
new file mode 100644
index 00000000000..6bfb168d1da
--- /dev/null
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerVolumeCalculation.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.container.diskbalancer;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import java.util.List;
+import java.util.Map;
+import org.apache.hadoop.hdds.fs.SpaceUsageSource;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
+import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
+import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utility class for disk balancer volume calculations.
+ *
+ * This class provides shared functionality for
+ * creating immutable volume snapshots,
+ * calculating ideal usage, and
+ * volume data density
+ * ensuring consistency across all disk balancing components and preventing
race conditions.
+ */
+public final class DiskBalancerVolumeCalculation {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(DiskBalancerVolumeCalculation.class);
+
+ private DiskBalancerVolumeCalculation() {
+ // Utility class - prevent instantiation
+ }
+
+ /**
+ * Get an immutable snapshot of volumes from a MutableVolumeSet.
+ *
+ * @param volumeSet The MutableVolumeSet to create a snapshot from
+ * @return Immutable list of HddsVolume objects
+ */
+ public static ImmutableList<HddsVolume>
getImmutableVolumeSet(MutableVolumeSet volumeSet) {
+ // Create an immutable copy of the volume list at this point in time
+ List<HddsVolume> volumes =
StorageVolumeUtil.getHddsVolumesList(volumeSet.getVolumesList());
+ return ImmutableList.copyOf(volumes);
+ }
+
+ /**
+ * Get ideal usage from an immutable list of volumes.
+ *
+ * @param volumes Immutable list of volumes
+ * @return Ideal usage as a ratio (used space / total capacity)
+ * @throws IllegalArgumentException if total capacity is zero
+ */
+ public static double getIdealUsage(ImmutableList<HddsVolume> volumes) {
+ long totalCapacity = 0L, totalFree = 0L;
+
+ for (HddsVolume volume : volumes) {
+ SpaceUsageSource usage = volume.getCurrentUsage();
+ totalCapacity += usage.getCapacity();
+ totalFree += usage.getAvailable();
+ }
+
+ Preconditions.checkArgument(totalCapacity != 0);
+ return ((double) (totalCapacity - totalFree)) / totalCapacity;
+ }
+
+ /**
+ * Calculate VolumeDataDensity.
+ *
+ * @param volumeSet The MutableVolumeSet containing all volumes
+ * @param deltaMap Map of volume to delta sizes (ongoing operations), can be
null
+ * @return VolumeDataDensity sum across all volumes
+ */
+ public static double calculateVolumeDataDensity(ImmutableList<HddsVolume>
volumeSet, Map<HddsVolume, Long> deltaMap) {
+ if (volumeSet == null) {
+ LOG.warn("VolumeSet is null, returning 0.0 for VolumeDataDensity");
+ return 0.0;
+ }
+
+ try {
+ // If there is only one volume, return 0.0 as there's no imbalance to
measure
+ if (volumeSet.size() <= 1) {
+ return 0.0;
+ }
+
+ // Calculate ideal usage using the same immutable volume snapshot
+ double idealUsage = getIdealUsage(volumeSet);
+ double volumeDensitySum = 0.0;
+
+ // Calculate density for each volume using the same snapshot
+ for (HddsVolume volume : volumeSet) {
+ SpaceUsageSource usage = volume.getCurrentUsage();
+ Preconditions.checkArgument(usage.getCapacity() != 0);
+
+ long deltaSize = (deltaMap != null) ? deltaMap.getOrDefault(volume,
0L) : 0L;
+ double currentUsage = (double)((usage.getCapacity() -
usage.getAvailable())
+ + deltaSize + volume.getCommittedBytes()) / usage.getCapacity();
+
+ // Calculate density as absolute difference from ideal usage
+ double volumeDensity = Math.abs(currentUsage - idealUsage);
+ volumeDensitySum += volumeDensity;
+ }
+ return volumeDensitySum;
+ } catch (Exception e) {
+ LOG.error("Error calculating VolumeDataDensity", e);
+ return -1.0;
+ }
+ }
+}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
index 20cd2aef0c4..021488987a7 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultVolumeChoosingPolicy.java
@@ -17,16 +17,17 @@
package org.apache.hadoop.ozone.container.diskbalancer.policy;
+import com.google.common.collect.ImmutableList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.fs.SpaceUsageSource;
-import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.AvailableSpaceFilter;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
+import
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -52,12 +53,19 @@ public Pair<HddsVolume, HddsVolume>
chooseVolume(MutableVolumeSet volumeSet,
double threshold, Map<HddsVolume, Long> deltaMap, long containerSize) {
lock.lock();
try {
- double idealUsage = volumeSet.getIdealUsage();
+ // Create truly immutable snapshot of volumes to ensure consistency
+ ImmutableList<HddsVolume> allVolumes =
DiskBalancerVolumeCalculation.getImmutableVolumeSet(volumeSet);
+
+ if (allVolumes.size() < 2) {
+ return null; // Can't balance with less than 2 volumes.
+ }
+
+ // Calculate ideal usage using the same immutable volume
+ double idealUsage =
DiskBalancerVolumeCalculation.getIdealUsage(allVolumes);
// Threshold is given as a percentage
double normalizedThreshold = threshold / 100;
- List<HddsVolume> volumes = StorageVolumeUtil
- .getHddsVolumesList(volumeSet.getVolumesList())
+ List<HddsVolume> volumes = allVolumes
.stream()
.filter(volume -> {
SpaceUsageSource usage = volume.getCurrentUsage();
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
index e75b861e7c0..0d22a896062 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
@@ -27,6 +27,7 @@
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
+import com.google.common.collect.ImmutableList;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
@@ -251,8 +252,10 @@ public void testCalculateBytesToMove(int volumeCount, int
deltaUsagePercent,
long expectedBytesToMove = (long) Math.ceil(
(totalCapacity * expectedBytesToMovePercent) / 100.0 *
totalOverUtilisedVolumes);
+ ImmutableList<HddsVolume> immutableVolumes =
DiskBalancerVolumeCalculation.getImmutableVolumeSet(volumeSet);
+
// data precision loss due to double data involved in calculation
- assertTrue(Math.abs(expectedBytesToMove -
svc.calculateBytesToMove(volumeSet)) <= 1);
+ assertTrue(Math.abs(expectedBytesToMove -
svc.calculateBytesToMove(immutableVolumes)) <= 1);
}
@Test
diff --git
a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
index 166309d4db6..991f51c5bd9 100644
---
a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
+++
b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
@@ -496,6 +496,7 @@ message DiskBalancerReportProto {
optional uint64 successMoveCount = 4;
optional uint64 failureMoveCount = 5;
optional uint64 bytesToMove = 6;
+ optional double volumeDataDensity = 7;
}
/**
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
index 5e66d081a25..61006ebf3b2 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerManager.java
@@ -22,7 +22,6 @@
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -34,7 +33,6 @@
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DiskBalancerRunningStatus;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DiskBalancerReportProto;
-import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
@@ -84,10 +82,9 @@ public List<HddsProtos.DatanodeDiskBalancerInfoProto>
getDiskBalancerReport(
for (DatanodeDetails datanodeDetails: nodeManager.getNodes(IN_SERVICE,
HddsProtos.NodeState.HEALTHY)) {
- double volumeDensitySum =
- getVolumeDataDensitySumForDatanodeDetails(datanodeDetails);
+ DiskBalancerStatus status = getStatus(datanodeDetails);
reportList.add(HddsProtos.DatanodeDiskBalancerInfoProto.newBuilder()
- .setCurrentVolumeDensitySum(volumeDensitySum)
+ .setCurrentVolumeDensitySum(status.getVolumeDataDensity())
.setNode(datanodeDetails.toProto(clientVersion))
.build());
}
@@ -267,14 +264,12 @@ private boolean shouldReturnDatanode(
private HddsProtos.DatanodeDiskBalancerInfoProto getInfoProto(
DatanodeInfo dn, int clientVersion) {
- double volumeDensitySum =
- getVolumeDataDensitySumForDatanodeDetails(dn);
DiskBalancerStatus status = getStatus(dn);
HddsProtos.DatanodeDiskBalancerInfoProto.Builder builder =
HddsProtos.DatanodeDiskBalancerInfoProto.newBuilder()
.setNode(dn.toProto(clientVersion))
- .setCurrentVolumeDensitySum(volumeDensitySum)
+ .setCurrentVolumeDensitySum(status.getVolumeDataDensity())
.setRunningStatus(status.getRunningStatus())
.setSuccessMoveCount(status.getSuccessMoveCount())
.setFailureMoveCount(status.getFailureMoveCount())
@@ -287,45 +282,16 @@ private HddsProtos.DatanodeDiskBalancerInfoProto
getInfoProto(
return builder.build();
}
- /**
- * Get volume density for a specific DatanodeDetails node.
- *
- * @param datanodeDetails DatanodeDetails
- * @return DiskBalancer report.
- */
- private double getVolumeDataDensitySumForDatanodeDetails(
- DatanodeDetails datanodeDetails) {
- Preconditions.checkArgument(datanodeDetails instanceof DatanodeInfo);
-
- DatanodeInfo datanodeInfo = (DatanodeInfo) datanodeDetails;
-
- double totalCapacity = 0d, totalFree = 0d;
- for (StorageReportProto reportProto : datanodeInfo.getStorageReports()) {
- totalCapacity += reportProto.getCapacity();
- totalFree += reportProto.getRemaining();
- }
-
- Preconditions.checkArgument(totalCapacity != 0);
- double idealUsage = (totalCapacity - totalFree) / totalCapacity;
-
- double volumeDensitySum = datanodeInfo.getStorageReports().stream()
- .map(report ->
- Math.abs(((double) (report.getCapacity() - report.getRemaining()))
/ report.getCapacity()
- - idealUsage))
- .mapToDouble(Double::valueOf).sum();
-
- return volumeDensitySum;
- }
-
public DiskBalancerStatus getStatus(DatanodeDetails datanodeDetails) {
return statusMap.computeIfAbsent(datanodeDetails,
- dn -> new DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN, new
DiskBalancerConfiguration(), 0, 0, 0, 0));
+ dn -> new DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN, new
DiskBalancerConfiguration(),
+ 0, 0, 0, 0, Double.NaN));
}
@VisibleForTesting
public void addRunningDatanode(DatanodeDetails datanodeDetails) {
statusMap.put(datanodeDetails, new
DiskBalancerStatus(DiskBalancerRunningStatus.RUNNING,
- new DiskBalancerConfiguration(), 0, 0, 0, 0));
+ new DiskBalancerConfiguration(), 0, 0, 0, 0, 0.0));
}
public void processDiskBalancerReport(DiskBalancerReportProto reportProto,
@@ -340,9 +306,12 @@ public void
processDiskBalancerReport(DiskBalancerReportProto reportProto,
long failureMoveCount = reportProto.getFailureMoveCount();
long bytesToMove = reportProto.getBytesToMove();
long balancedBytes = reportProto.getBalancedBytes();
+ double volumeDataDensity = reportProto.getVolumeDataDensity();
+
statusMap.put(dn, new DiskBalancerStatus(
isRunning ? DiskBalancerRunningStatus.RUNNING :
DiskBalancerRunningStatus.STOPPED,
- diskBalancerConfiguration, successMoveCount, failureMoveCount,
bytesToMove, balancedBytes));
+ diskBalancerConfiguration, successMoveCount, failureMoveCount,
bytesToMove, balancedBytes,
+ volumeDataDensity));
if (reportProto.hasBalancedBytes() && balancedBytesMap != null) {
balancedBytesMap.put(dn, reportProto.getBalancedBytes());
}
@@ -353,7 +322,7 @@ public void markStatusUnknown(DatanodeDetails dn) {
if (currentStatus != null &&
currentStatus.getRunningStatus() != DiskBalancerRunningStatus.UNKNOWN)
{
DiskBalancerStatus unknownStatus = new
DiskBalancerStatus(DiskBalancerRunningStatus.UNKNOWN,
- new DiskBalancerConfiguration(), 0, 0, 0, 0);
+ new DiskBalancerConfiguration(), 0, 0, 0, 0, Double.NaN);
statusMap.put(dn, unknownStatus);
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
index e8df44f0b56..fa182d7a22c 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DiskBalancerStatus.java
@@ -35,15 +35,18 @@ public class DiskBalancerStatus {
private long failureMoveCount;
private long bytesToMove;
private long balancedBytes;
+ private double volumeDataDensity;
public DiskBalancerStatus(DiskBalancerRunningStatus isRunning,
DiskBalancerConfiguration conf,
- long successMoveCount, long failureMoveCount, long bytesToMove, long
balancedBytes) {
+ long successMoveCount, long failureMoveCount, long bytesToMove, long
balancedBytes,
+ double volumeDataDensity) {
this.isRunning = isRunning;
this.diskBalancerConfiguration = conf;
this.successMoveCount = successMoveCount;
this.failureMoveCount = failureMoveCount;
this.bytesToMove = bytesToMove;
this.balancedBytes = balancedBytes;
+ this.volumeDataDensity = volumeDataDensity;
}
public DiskBalancerRunningStatus getRunningStatus() {
@@ -69,4 +72,8 @@ public long getBytesToMove() {
public long getBalancedBytes() {
return balancedBytes;
}
+
+ public double getVolumeDataDensity() {
+ return volumeDataDensity;
+ }
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
index dbdf4974145..41d1a09ffaf 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
@@ -65,6 +65,12 @@ public void setup() throws Exception {
@Test
public void testDatanodeDiskBalancerReport() throws IOException {
+ // Populate disk balancer reports for all datanodes to avoid Double.NaN
comparison issues.
+ for (DatanodeDetails dn : nodeManager.getAllNodes()) {
+ diskBalancerReportHandler.onMessage(
+ new DiskBalancerReportFromDatanode(dn, generateRandomReport()),
null);
+ }
+
List<HddsProtos.DatanodeDiskBalancerInfoProto> reportProtoList =
diskBalancerManager.getDiskBalancerReport(2,
ClientVersion.CURRENT_VERSION);
@@ -117,6 +123,7 @@ private DiskBalancerReportProto generateRandomReport() {
return DiskBalancerReportProto.newBuilder()
.setIsRunning(random.nextBoolean())
.setBalancedBytes(random.nextInt(10000))
+ .setVolumeDataDensity(random.nextDouble())
.setDiskBalancerConf(
HddsProtos.DiskBalancerConfigurationProto.newBuilder()
.setThreshold(random.nextInt(99))
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
index d2620140d72..aebc7318885 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
@@ -34,6 +34,7 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DiskBalancerReportProto;
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.PlacementPolicy;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -42,7 +43,9 @@
import
org.apache.hadoop.hdds.scm.container.placement.algorithms.SCMContainerPlacementCapacity;
import org.apache.hadoop.hdds.scm.node.DatanodeInfo;
import org.apache.hadoop.hdds.scm.node.DiskBalancerManager;
+import org.apache.hadoop.hdds.scm.node.DiskBalancerReportHandler;
import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher;
import org.apache.hadoop.ozone.ClientVersion;
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
@@ -63,6 +66,7 @@ public class TestDiskBalancer {
private static ScmClient storageClient;
private static MiniOzoneCluster cluster;
private static DiskBalancerManager diskBalancerManager;
+ private static DiskBalancerReportHandler diskBalancerReportHandler;
@BeforeAll
public static void setup() throws Exception {
@@ -76,6 +80,8 @@ public static void setup() throws Exception {
cluster.waitForClusterToBeReady();
diskBalancerManager = cluster.getStorageContainerManager()
.getDiskBalancerManager();
+ diskBalancerReportHandler =
+ new DiskBalancerReportHandler(diskBalancerManager);
for (DatanodeDetails dn: cluster.getStorageContainerManager()
.getScmNodeManager().getAllNodes()) {
@@ -93,6 +99,13 @@ public static void cleanup() throws Exception {
@Test
public void testDatanodeDiskBalancerReport() throws IOException {
+ // Populate disk balancer reports for all datanodes to avoid Double.NaN
comparison issues.
+ for (DatanodeDetails dn : cluster.getStorageContainerManager()
+ .getScmNodeManager().getAllNodes()) {
+ diskBalancerReportHandler.onMessage(
+ new
SCMDatanodeHeartbeatDispatcher.DiskBalancerReportFromDatanode(dn,
generateRandomReport()), null);
+ }
+
List<HddsProtos.DatanodeDiskBalancerInfoProto> reportProtoList =
storageClient.getDiskBalancerReport(2);
@@ -186,4 +199,18 @@ public void testDatanodeDiskBalancerStatus() throws
IOException, InterruptedExce
ClientVersion.CURRENT_VERSION);
assertEquals(1, statusProtoList.size());
}
+
+ private DiskBalancerReportProto generateRandomReport() {
+ return DiskBalancerReportProto.newBuilder()
+ .setIsRunning(true)
+ .setBalancedBytes(1000)
+ .setVolumeDataDensity(Math.random() * 10)
+ .setDiskBalancerConf(
+ HddsProtos.DiskBalancerConfigurationProto.newBuilder()
+ .setThreshold(10)
+ .setParallelThread(2)
+ .setDiskBandwidthInMB(50)
+ .build())
+ .build();
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]