This is an automated email from the ASF dual-hosted git repository.
sammichen pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/HDDS-5713 by this push:
new fc16787c351 HDDS-14110. [DiskBalancer] Show EstimatedBytesToMove only
during active balancing and improve threshold check message (#9465)
fc16787c351 is described below
commit fc16787c351dce867a1d094e5440a8bcaadd33c9
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Wed Dec 31 07:54:20 2025 +0530
HDDS-14110. [DiskBalancer] Show EstimatedBytesToMove only during active
balancing and improve threshold check message (#9465)
---
.../diskbalancer/DiskBalancerConfiguration.java | 8 ++---
.../policy/ContainerChoosingPolicy.java | 2 +-
.../policy/DiskBalancerVolumeChoosingPolicy.java | 2 +-
.../diskbalancer/TestDiskBalancerService.java | 36 ++++++++++++++++++++++
hadoop-hdds/docs/content/feature/DiskBalancer.md | 8 +++--
.../docs/content/feature/DiskBalancer.zh.md | 4 +--
.../scm/cli/datanode/DiskBalancerCommands.java | 4 +--
.../cli/datanode/DiskBalancerStartSubcommand.java | 2 +-
.../cli/datanode/DiskBalancerStatusSubcommand.java | 9 ++++--
.../cli/datanode/DiskBalancerUpdateSubcommand.java | 2 +-
10 files changed, 61 insertions(+), 16 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerConfiguration.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerConfiguration.java
index 711cc7b972d..c0ee728b509 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerConfiguration.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerConfiguration.java
@@ -44,7 +44,7 @@ public final class DiskBalancerConfiguration {
"back to use metadata directory instead.")
private String infoDir;
- @Config(key = "volume.density.threshold", type = ConfigType.DOUBLE,
+ @Config(key = "volume.density.threshold.percent", type = ConfigType.DOUBLE,
defaultValue = "10", tags = {ConfigTag.DISKBALANCER},
description = "Threshold is a percentage in the range of 0 to 100. A " +
"datanode is considered balanced if for each volume, the " +
@@ -197,12 +197,12 @@ public double getThresholdAsRatio() {
/**
* Sets the threshold value for Disk Balancer.
*
- * @param threshold a percentage value in the range 0 to 100
+ * @param threshold a percentage value in the range (0 to 100) both exclusive
*/
public void setThreshold(double threshold) {
- if (threshold < 0d || threshold >= 100d) {
+ if (threshold <= 0d || threshold >= 100d) {
throw new IllegalArgumentException(
- "Threshold must be a percentage(double) in the range 0 to 100.");
+ "Threshold must be a percentage(double) in the range 0 to 100 both
exclusive.");
}
this.threshold = threshold;
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/ContainerChoosingPolicy.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/ContainerChoosingPolicy.java
index c6bcc1ff50f..d512de84bdf 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/ContainerChoosingPolicy.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/ContainerChoosingPolicy.java
@@ -36,7 +36,7 @@ public interface ContainerChoosingPolicy {
* @param destVolume the destination volume to which container is being
moved.
* @param inProgressContainerIDs containerIDs present in this set should be
- avoided as these containers are already under move by diskBalancer.
- * @param thresholdPercentage the threshold percentage in range [0, 100]
+ * @param thresholdPercentage the threshold percentage in range (0, 100)
* @param volumeSet the volumeSet instance
* @param deltaMap the deltaMap instance of source volume
* @return a Container
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DiskBalancerVolumeChoosingPolicy.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DiskBalancerVolumeChoosingPolicy.java
index 043aa83f5f9..c64733fc37e 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DiskBalancerVolumeChoosingPolicy.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DiskBalancerVolumeChoosingPolicy.java
@@ -30,7 +30,7 @@ public interface DiskBalancerVolumeChoosingPolicy {
* Choose a pair of volumes for balancing.
*
* @param volumeSet - volumes to choose from.
- * @param thresholdPercentage the threshold percentage in range [0, 100] to
choose the source volume.
+ * @param thresholdPercentage the threshold percentage in range (0, 100) to
choose the source volume.
* @param deltaSizes - the sizes changes of inProgress balancing jobs.
* @param containerSize - the estimated size of container to be moved.
* @return Source volume and Dest volume.
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
index ca5dc710856..012cd3742eb 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerService.java
@@ -21,6 +21,7 @@
import static
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.getVolumeUsages;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyDouble;
@@ -325,4 +326,39 @@ private void
setLayoutAndSchemaForTest(ContainerTestVersionInfo versionInfo) {
String schemaVersion = versionInfo.getSchemaVersion();
ContainerTestVersionInfo.setTestSchemaVersion(schemaVersion, conf);
}
+
+ public static Stream<Arguments> thresholdValidationTestCases() {
+ return Stream.of(
+ // Invalid values that should throw IllegalArgumentException
+ Arguments.arguments(0.0, true, null),
+ Arguments.arguments(100.0, true, null),
+ Arguments.arguments(-1.0, true, null),
+ Arguments.arguments(-0.001, true, null),
+ Arguments.arguments(100.001, true, null),
+ // Valid boundary values that should be accepted
+ Arguments.arguments(0.001, false, 0.001),
+ Arguments.arguments(99.999, false, 99.999),
+ // Valid middle values that should be accepted
+ Arguments.arguments(50.5, false, 50.5),
+ Arguments.arguments(99.0, false, 99.0)
+ );
+ }
+
+ @ParameterizedTest
+ @MethodSource("thresholdValidationTestCases")
+ public void testDiskBalancerConfigurationThresholdValidation(double
threshold,
+ boolean shouldThrowException, Double expectedThreshold) {
+ DiskBalancerConfiguration config = new DiskBalancerConfiguration();
+
+ if (shouldThrowException) {
+ IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class,
+ () -> config.setThreshold(threshold));
+ assertEquals("Threshold must be a percentage(double) in the range 0 to
100 both exclusive.",
+ exception.getMessage());
+ } else {
+ // Valid threshold should be accepted
+ config.setThreshold(threshold);
+ assertEquals(expectedThreshold, config.getThreshold(), 0.0001);
+ }
+ }
}
diff --git a/hadoop-hdds/docs/content/feature/DiskBalancer.md
b/hadoop-hdds/docs/content/feature/DiskBalancer.md
index 8c708795ebb..3339d3b49a7 100644
--- a/hadoop-hdds/docs/content/feature/DiskBalancer.md
+++ b/hadoop-hdds/docs/content/feature/DiskBalancer.md
@@ -156,7 +156,7 @@ ozone admin datanode diskbalancer report
[<datanode-address> ...] [--in-service-
| `<datanode-address>` | One or more datanode addresses as
positional arguments. Addresses can be:<br>- Hostname (e.g., `DN-1`) - uses
default CLIENT_RPC port (19864)<br>- Hostname with port (e.g.,
`DN-1:19864`)<br>- IP address (e.g., `192.168.1.10`)<br>- IP address with port
(e.g., `192.168.1.10:19864`)<br>- Stdin (`-`) - reads datanode addresses from
standard input, one per line | `DN-1`<br>`DN-1:19864`<br>`192.168.1.10`<br>`-` |
| `--in-service-datanodes` | It queries SCM for all IN_SERVICE
datanodes and executes the command on all of them.
| `--in-service-datanodes` |
| `--json` | Format output as JSON.
|
`--json` |
-| `-t/--threshold` | Volume density threshold percentage
(default: 10.0). Used with `start` and `update` commands.
| `-t 5`<br>`--threshold 5.0` |
+| `-t/--threshold-percentage` | Volume density threshold percentage
(default: 10.0). Used with `start` and `update` commands.
| `-t 5`<br>`--threshold-percentage 5.0` |
| `-b/--bandwidth-in-mb` | Maximum disk bandwidth in MB/s
(default: 10). Used with `start` and `update` commands.
| `-b 20`<br>`--bandwidth-in-mb 50` |
| `-p/--parallel-thread` | Number of parallel threads (default:
1). Used with `start` and `update` commands.
| `-p 5`<br>`--parallel-thread 10` |
| `-s/--stop-after-disk-even` | Stop automatically after disks are
balanced (default: false). Used with `start` and `update` commands.
| `-s false`<br>`--stop-after-disk-even true` |
@@ -173,6 +173,8 @@ ozone admin datanode diskbalancer start
--in-service-datanodes
# Start DiskBalancer with configuration parameters
ozone admin datanode diskbalancer start DN-1 -t 5 -b 20 -p 5
+# Or using the long form:
+ozone admin datanode diskbalancer start DN-1 --threshold-percentage 5 -b 20 -p
5
# Read datanode addresses from stdin
echo -e "DN-1\nDN-2" | ozone admin datanode diskbalancer start -
@@ -201,6 +203,8 @@ ozone admin datanode diskbalancer update DN-1 -t 5 -b 50 -p
10
# Update on all IN_SERVICE datanodes
ozone admin datanode diskbalancer update --in-service-datanodes -t 5
+# Or using the long form:
+ozone admin datanode diskbalancer update --in-service-datanodes
--threshold-percentage 5
# Update with json output
ozone admin datanode diskbalancer update DN-1 -b 50 --json
@@ -237,7 +241,7 @@ The DiskBalancer's behavior can be controlled using the
following configuration
| Property | Default Value
|
Description
|
|-------------------------------------------------------------|----------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `hdds.datanode.disk.balancer.enabled` | `false`
| If
false, the DiskBalancer service on the Datanode is disabled. Configure it to
true for diskBalancer to be enabled.
|
-| `hdds.datanode.disk.balancer.volume.density.threshold` | `10.0`
| A
percentage (0-100). A datanode is considered balanced if for each volume, its
utilization differs from the average datanode utilization by no more than this
threshold. |
+| `hdds.datanode.disk.balancer.volume.density.threshold.percent` | `10.0`
| A
percentage (0-100). A datanode is considered balanced if for each volume, its
utilization differs from the average datanode utilization by no more than this
threshold. |
| `hdds.datanode.disk.balancer.max.disk.throughputInMBPerSec` | `10`
| The
maximum bandwidth (in MB/s) that the balancer can use for moving data, to avoid
impacting client I/O.
|
| `hdds.datanode.disk.balancer.parallel.thread` | `5`
| The
number of worker threads to use for moving containers in parallel.
|
| `hdds.datanode.disk.balancer.service.interval` | `60s`
| The
time interval at which the Datanode DiskBalancer service checks for imbalance
and updates its configuration.
|
diff --git a/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md
b/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md
index 5af4046066a..65ba7ca3fa1 100644
--- a/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md
+++ b/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md
@@ -151,7 +151,7 @@ ozone admin datanode diskbalancer report
[<datanode-address> ...] [--in-service-
| `<datanode-address>` | 一个或多个数据节点地址作为位置参数。地址可以是:<br>-
主机名(例如,`DN-1`)- 使用默认的 CLIENT_RPC 端口 (19864)<br>- 带端口的主机名(例如,`DN-1:19864`)<br>-
IP 地址(例如,`192.168.1.10`)<br>- 带端口的 IP 地址(例如,`192.168.1.10:19864`)<br>- 标准输入
(`-`) - 从标准输入读取数据节点地址,每行一个 | `DN-1`<br>`DN-1:19864`<br>`192.168.1.10`<br>`-` |
| `--in-service-datanodes` | 它向 SCM 查询所有 IN_SERVICE
数据节点,并在所有这些数据节点上执行该命令。
| `--in-service-datanodes` |
| `--json` | 输出格式设置为JSON。
| `--json` |
-| `-t/--threshold` | 磁盘使用率阈值百分比(默认值:10.0)。与 `start` 和
`update` 命令一起使用。
| `-t 5`<br>`--threshold 5.0` |
+| `-t/--threshold-percentage` | 磁盘使用率阈值百分比(默认值:10.0)。与 `start` 和
`update` 命令一起使用。
| `-t 5`<br>`--threshold-percentage 5.0` |
| `-b/--bandwidth-in-mb` | 最大磁盘带宽,单位为 MB/s(默认值:10)。与 `start` 和
`update` 命令一起使用。
| `-b 20`<br>`--bandwidth-in-mb 50` |
| `-p/--parallel-thread` | 并行线程数(默认值:1)。与 `start` 和 `update`
命令一起使用。
| `-p 5`<br>`--parallel-thread 10` |
| `-s/--stop-after-disk-even` | 磁盘平衡完成后自动停止(默认值:false)。与 `start` 和
`update` 命令一起使用。
| `-s false`<br>`--stop-after-disk-even true` |
@@ -233,7 +233,7 @@ The DiskBalancer's behavior can be controlled using the
following configuration
| Property | Default Value
| Description
|
|-------------------------------------------------------------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `hdds.datanode.disk.balancer.enabled` | `false`
| 如果为 false,则 Datanode 上的 DiskBalancer 服务将被禁用。将其配置为
true 可启用 DiskBalancer。 |
|
|
[...]
-| `hdds.datanode.disk.balancer.volume.density.threshold` | `10.0`
|
百分比(0-100)。如果对于每个卷,其利用率与平均数据节点利用率之差不超过此阈值,则认为数据节点处于平衡状态。 |
+| `hdds.datanode.disk.balancer.volume.density.threshold.percent` | `10.0`
|
百分比(0-100)。如果对于每个卷,其利用率与平均数据节点利用率之差不超过此阈值,则认为数据节点处于平衡状态。 |
| `hdds.datanode.disk.balancer.max.disk.throughputInMBPerSec` | `10`
| 平衡器可用于移动数据的最大带宽(以 MB/s 为单位),以避免影响客户端 I/O。
|
| `hdds.datanode.disk.balancer.parallel.thread` | `5`
| 用于并行移动容器的工作线程数。
|
| `hdds.datanode.disk.balancer.service.interval` | `60s`
| Datanode DiskBalancer 服务检查不平衡并更新其配置的时间间隔。
|
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerCommands.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerCommands.java
index 4c972bad4e8..c912ad73550 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerCommands.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerCommands.java
@@ -39,7 +39,7 @@
*
* To start:
* ozone admin datanode diskbalancer start {@literal <host[:port]>}
[{@literal <host[:port]>} ...]
- * [ -t/--threshold {@literal <threshold>}]
+ * [ -t/--threshold-percentage {@literal <threshold>}]
* [ -b/--bandwidth-in-mb {@literal <bandwidthInMB>}]
* [ -p/--parallel-thread {@literal <parallelThread>}]
* [ -s/--stop-after-disk-even {@literal <stopAfterDiskEven>}]
@@ -97,7 +97,7 @@
*
* To update:
* ozone admin datanode diskbalancer update {@literal <host[:port]>}
[{@literal <host[:port]>} ...]
- * [ -t/--threshold {@literal <threshold>}]
+ * [ -t/--threshold-percentage {@literal <threshold>}]
* [ -b/--bandwidth-in-mb {@literal <bandwidthInMB>}]
* [ -p/--parallel-thread {@literal <parallelThread>}]
* [ -s/--stop-after-disk-even {@literal <stopAfterDiskEven>}]
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java
index 364065f99d0..8aefc0dca9f 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStartSubcommand.java
@@ -37,7 +37,7 @@
versionProvider = HddsVersionProvider.class)
public class DiskBalancerStartSubcommand extends
AbstractDiskBalancerSubCommand {
- @Option(names = {"-t", "--threshold"},
+ @Option(names = {"-t", "--threshold-percentage"},
description = "Percentage deviation from average utilization of " +
"the disks after which a datanode will be rebalanced (for " +
"example, '10' for 10%%).")
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
index 2c097da4d75..34d4d1e1bc7 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerStatusSubcommand.java
@@ -124,8 +124,13 @@ private String
generateStatus(List<DatanodeDiskBalancerInfoProto> protos) {
contentList.add(estimatedTimeLeft >= 0 ?
String.valueOf(estimatedTimeLeft) : "N/A");
}
- formatBuilder.append("%nNote: Estimated time left is calculated" +
- " based on the estimated bytes to move and the configured disk
bandwidth.");
+ formatBuilder.append("%nNote:%n");
+ formatBuilder.append(" - EstBytesToMove is calculated based on the target
disk even state" +
+ " with the configured threshold.%n");
+ formatBuilder.append(" - EstTimeLeft is calculated based on
EstimatedBytesToMove and configured" +
+ " disk bandwidth.%n");
+ formatBuilder.append(" - Both EstimatedBytes and EstTimeLeft could be
non-zero while no containers" +
+ " can be moved, especially when the configured threshold or disk
capacity is too small.");
return String.format(formatBuilder.toString(),
contentList.toArray(new String[0]));
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java
index 4ba554df82e..825a81c8aca 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DiskBalancerUpdateSubcommand.java
@@ -37,7 +37,7 @@
versionProvider = HddsVersionProvider.class)
public class DiskBalancerUpdateSubcommand extends
AbstractDiskBalancerSubCommand {
- @Option(names = {"-t", "--threshold"},
+ @Option(names = {"-t", "--threshold-percentage"},
description = "Percentage deviation from average utilization of " +
"the disks after which a datanode will be rebalanced (for " +
"example, '10' for 10%%).")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]