This is an automated email from the ASF dual-hosted git repository.

sammichen pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-5713 by this push:
     new 3623f030eff HDDS-13497. [DiskBalancer] Add new property 
"hdds.datanode.disk.balancer.enabled" (#8869)
3623f030eff is described below

commit 3623f030effbdc813f5ca37fe39da7e7fc53cdd3
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Mon Aug 4 13:52:18 2025 +0530

    HDDS-13497. [DiskBalancer] Add new property 
"hdds.datanode.disk.balancer.enabled" (#8869)
---
 .../org/apache/hadoop/hdds/HddsConfigKeys.java     |  4 +++
 .../common/src/main/resources/ozone-default.xml    |  9 +++++
 .../common/statemachine/DatanodeStateMachine.java  | 24 +++++++++----
 .../states/endpoint/HeartbeatEndpointTask.java     |  4 ++-
 .../ozone/container/ozoneimpl/OzoneContainer.java  | 41 ++++++++++++++++------
 hadoop-hdds/docs/content/design/diskbalancer.md    | 11 ++++++
 hadoop-hdds/docs/content/feature/DiskBalancer.md   | 23 ++++++------
 .../docs/content/feature/DiskBalancer.zh.md        | 23 ++++++------
 .../hdds/scm/server/SCMClientProtocolServer.java   | 17 +++++++++
 .../hdds/scm/server/StorageContainerManager.java   | 21 +++++++----
 .../hdds/scm/node/TestDiskBalancerManager.java     |  1 +
 .../src/main/compose/ozonesecure/diskbalancer.yaml | 26 ++++++++++++++
 .../main/compose/ozonesecure/test-diskbalancer.sh  | 33 +++++++++++++++++
 .../testdiskbalancer.robot                         |  0
 .../hadoop/ozone/scm/node/TestDiskBalancer.java    |  4 ++-
 ...skBalancerDuringDecommissionAndMaintenance.java |  2 ++
 16 files changed, 195 insertions(+), 48 deletions(-)

diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
index 488dbd9eb19..d9db5ade573 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
@@ -399,6 +399,10 @@ public final class HddsConfigKeys {
   public static final String 
OZONE_DATANODE_IO_METRICS_PERCENTILES_INTERVALS_SECONDS_KEY =
       "ozone.volume.io.percentiles.intervals.seconds";
 
+  public static final String HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY =
+      "hdds.datanode.disk.balancer.enabled";
+  public static final boolean HDDS_DATANODE_DISK_BALANCER_ENABLED_DEFAULT = 
false;
+
   public static final String HDDS_DATANODE_DNS_INTERFACE_KEY =
       "hdds.datanode.dns.interface";
   public static final String HDDS_DATANODE_DNS_NAMESERVER_KEY =
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml 
b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 45c07cb2bbe..9e3f5fa8b32 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -208,6 +208,15 @@
       The value should be between 0-1. Such as 0.1 which means 10% of volume 
space will be reserved.
     </description>
   </property>
+  <property>
+    <name>hdds.datanode.disk.balancer.enabled</name>
+    <value>false</value>
+    <tag>OZONE, DATANODE, DISKBALANCER</tag>
+    <description>If this property is set to true, then the Disk Balancer
+      feature is enabled on SCM and Datanodes, and users can use
+      this service. By default, this is disabled.
+    </description>
+  </property>
   <property>
     <name>hdds.datanode.volume.choosing.policy</name>
     
<value>org.apache.hadoop.ozone.container.common.volume.CapacityVolumeChoosingPolicy</value>
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index 861ff5579c0..0423dcb5db5 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -245,7 +245,7 @@ public DatanodeStateMachine(HddsDatanodeService 
hddsDatanodeService,
 
     // When we add new handlers just adding a new handler here should do the
     // trick.
-    commandDispatcher = CommandDispatcher.newBuilder()
+    CommandDispatcher.Builder dispatcherBuilder = 
CommandDispatcher.newBuilder()
         .addHandler(new CloseContainerCommandHandler(
             dnConf.getContainerCloseThreads(),
             dnConf.getCommandQueueLimit(), threadNamePrefix))
@@ -261,16 +261,26 @@ public DatanodeStateMachine(HddsDatanodeService 
hddsDatanodeService,
             pipelineCommandExecutorService))
         .addHandler(new CreatePipelineCommandHandler(conf,
             pipelineCommandExecutorService))
-        .addHandler(new SetNodeOperationalStateCommandHandler(conf,
-            supervisor::nodeStateUpdated, 
container.getDiskBalancerService()::nodeStateUpdated))
         .addHandler(new FinalizeNewLayoutVersionCommandHandler())
         .addHandler(new RefreshVolumeUsageCommandHandler())
-        .addHandler(new ReconcileContainerCommandHandler(supervisor, dnClient))
-        .addHandler(new DiskBalancerCommandHandler())
+        .addHandler(new ReconcileContainerCommandHandler(supervisor, 
dnClient));
+
+    if (container.getDiskBalancerService() != null) {
+      dispatcherBuilder.addHandler(new SetNodeOperationalStateCommandHandler(
+          conf, supervisor::nodeStateUpdated,
+          container.getDiskBalancerService()::nodeStateUpdated));
+      dispatcherBuilder.addHandler(new DiskBalancerCommandHandler());
+    } else {
+      dispatcherBuilder.addHandler(new SetNodeOperationalStateCommandHandler(
+          conf, supervisor::nodeStateUpdated, null));
+    }
+
+    dispatcherBuilder
         .setConnectionManager(connectionManager)
         .setContainer(container)
-        .setContext(context)
-        .build();
+        .setContext(context);
+
+    commandDispatcher = dispatcherBuilder.build();
 
     reportManager = ReportManager.newBuilder(conf)
         .setStateContext(context)
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
index 44714c824c7..1309be796d1 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
@@ -259,7 +259,9 @@ private void addQueuedCommandCounts(
 
   private void addDiskBalancerReport(SCMHeartbeatRequestProto.Builder 
requestBuilder) {
     DiskBalancerInfo info = 
context.getParent().getContainer().getDiskBalancerInfo();
-    requestBuilder.setDiskBalancerReport(info.toDiskBalancerReportProto());
+    if (info != null) {
+      requestBuilder.setDiskBalancerReport(info.toDiskBalancerReportProto());
+    }
   }
 
   /**
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
index 5661ddddb77..9000be01ea3 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
@@ -17,6 +17,8 @@
 
 package org.apache.hadoop.ozone.container.ozoneimpl;
 
+import static 
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_DEFAULT;
+import static 
org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY;
 import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT;
 import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT;
 import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_WORKERS;
@@ -136,7 +138,7 @@ public class OzoneContainer {
   private final StaleRecoveringContainerScrubbingService
       recoveringContainerScrubbingService;
   private final GrpcTlsConfig tlsClientConfig;
-  private final DiskBalancerService diskBalancerService;
+  private DiskBalancerService diskBalancerService;
   private final AtomicReference<InitializingStatus> initializingStatus;
   private final ReplicationServer replicationServer;
   private DatanodeDetails datanodeDetails;
@@ -278,14 +280,20 @@ public OzoneContainer(HddsDatanodeService 
hddsDatanodeService,
             checksumTreeManager,
             context.getParent().getReconfigurationHandler());
 
-    Duration diskBalancerSvcInterval = conf.getObject(
-        DiskBalancerConfiguration.class).getDiskBalancerInterval();
-    Duration diskBalancerSvcTimeout = conf.getObject(
-        DiskBalancerConfiguration.class).getDiskBalancerTimeout();
-    diskBalancerService =
-        new DiskBalancerService(this, diskBalancerSvcInterval.toMillis(),
-            diskBalancerSvcTimeout.toMillis(), TimeUnit.MILLISECONDS, 1,
-            config);
+    if (conf.getBoolean(HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY,
+        HDDS_DATANODE_DISK_BALANCER_ENABLED_DEFAULT)) {
+      Duration diskBalancerSvcInterval = conf.getObject(
+          DiskBalancerConfiguration.class).getDiskBalancerInterval();
+      Duration diskBalancerSvcTimeout = conf.getObject(
+          DiskBalancerConfiguration.class).getDiskBalancerTimeout();
+      diskBalancerService =
+          new DiskBalancerService(this, diskBalancerSvcInterval.toMillis(),
+              diskBalancerSvcTimeout.toMillis(), TimeUnit.MILLISECONDS, 1,
+              config);
+    } else {
+      diskBalancerService = null;
+      LOG.info("Disk Balancer is disabled.");
+    }
 
     Duration recoveringContainerScrubbingSvcInterval =
         dnConf.getRecoveringContainerScrubInterval();
@@ -542,7 +550,10 @@ public void start(String clusterId) throws IOException {
     writeChannel.start();
     readChannel.start();
     blockDeletingService.start();
-    diskBalancerService.start();
+
+    if (diskBalancerService != null) {
+      diskBalancerService.start();
+    }
     recoveringContainerScrubbingService.start();
 
     initHddsVolumeContainer();
@@ -573,7 +584,9 @@ public void stop() {
       dbCompactionExecutorService.shutdown();
     }
     blockDeletingService.shutdown();
-    diskBalancerService.shutdown();
+    if (diskBalancerService != null) {
+      diskBalancerService.shutdown();
+    }
     recoveringContainerScrubbingService.shutdown();
     IOUtils.closeQuietly(metrics);
     ContainerMetrics.remove();
@@ -703,10 +716,16 @@ public void compactDb() {
   }
 
   public DiskBalancerReportProto getDiskBalancerReport() {
+    if (diskBalancerService == null) {
+      return null;
+    }
     return diskBalancerService.getDiskBalancerReportProto();
   }
 
   public DiskBalancerInfo getDiskBalancerInfo() {
+    if (diskBalancerService == null) {
+      return null;
+    }
     return diskBalancerService.getDiskBalancerInfo();
   }
 
diff --git a/hadoop-hdds/docs/content/design/diskbalancer.md 
b/hadoop-hdds/docs/content/design/diskbalancer.md
index 5121631c037..08326223482 100644
--- a/hadoop-hdds/docs/content/design/diskbalancer.md
+++ b/hadoop-hdds/docs/content/design/diskbalancer.md
@@ -118,6 +118,17 @@ and is not already being moved by another balancing 
operation. To optimize perfo
 containers repeatedly, it caches the list of containers for each volume which 
auto expires after one hour of its last 
 used time or if the container iterator for that is invalidated on full 
utilisation.
 
+## Feature Flag
+
+The Disk Balancer feature is introduced with a feature flag. By default, this 
feature is disabled to prevent it from 
+running until it has undergone thorough testing.
+
+The feature can be enabled by setting the following property to `true` in the 
`ozone-site.xml` configuration file:
+`hdds.datanode.disk.balancer.enabled = false`
+
+Developers who wish to test or use the Disk Balancer must explicitly enable 
it. Once the feature is 
+considered stable, the default value may be changed to `true` in a future 
release.
+
 ## DiskBalancer Metrics
 
 The DiskBalancer service exposes JMX metrics on each Datanode for real-time 
monitoring. These metrics provide insights
diff --git a/hadoop-hdds/docs/content/feature/DiskBalancer.md 
b/hadoop-hdds/docs/content/feature/DiskBalancer.md
index 6ef86ec9654..311b19239e0 100644
--- a/hadoop-hdds/docs/content/feature/DiskBalancer.md
+++ b/hadoop-hdds/docs/content/feature/DiskBalancer.md
@@ -108,15 +108,16 @@ ozone admin datanode diskbalancer report --count <N>
 
 The DiskBalancer's behavior can be controlled using the following 
configuration properties in `ozone-site.xml`.
 
-| Property                                                     | Default Value 
                                                                         | 
Description                                                                     
                                                                                
             |
-| ------------------------------------------------------------ 
|----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `hdds.datanode.disk.balancer.volume.density.threshold`       | `10.0`        
                                                                         | A 
percentage (0-100). A datanode is considered balanced if for each volume, its 
utilization differs from the average datanode utilization by no more than this 
threshold.    |
-| `hdds.datanode.disk.balancer.max.disk.throughputInMBPerSec`  | `10`          
                                                                         | The 
maximum bandwidth (in MB/s) that the balancer can use for moving data, to avoid 
impacting client I/O.                                                           
         |
-| `hdds.datanode.disk.balancer.parallel.thread`                | `5`           
                                                                         | The 
number of worker threads to use for moving containers in parallel.              
                                                                                
         |
-| `hdds.datanode.disk.balancer.service.interval`               | `60s`         
                                                                         | The 
time interval at which the Datanode DiskBalancer service checks for imbalance 
and updates its configuration.                                                  
           |
-| `hdds.datanode.disk.balancer.stop.after.disk.even`           | `true`        
                                                                         | If 
true, the DiskBalancer will automatically stop its balancing activity once 
disks are considered balanced (i.e., all volume densities are within the 
threshold).           |
-| `hdds.datanode.disk.balancer.volume.choosing.policy`         | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultVolumeChoosingPolicy`
    | The policy class for selecting source and destination volumes for 
balancing.                                                                      
                           |
-| `hdds.datanode.disk.balancer.container.choosing.policy`      | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultContainerChoosingPolicy`
 | The policy class for selecting which containers to move from a source volume 
to destination volume.                                                          
                |
-| `hdds.datanode.disk.balancer.service.timeout`                | `300s`        
                                                                         | 
Timeout for the Datanode DiskBalancer service operations.                       
                                                                                
             |
-| `hdds.datanode.disk.balancer.should.run.default`             | `false`       
                                                                         | If 
the balancer fails to read its persisted configuration, this value determines 
if the service should run by default.                                           
            |
+| Property                                                    | Default Value  
                                                                        | 
Description                                                                     
                                                                                
          |
+|-------------------------------------------------------------|----------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `hdds.datanode.disk.balancer.enabled`                       | `false`        
                                                                        | if 
false, the DiskBalancer service on the Datanode is disabled. Configure it to 
true for diskBalancer to be enabled.                                            
          |                                                            |        
                                                                                
|                   [...]
+| `hdds.datanode.disk.balancer.volume.density.threshold`      | `10.0`         
                                                                        | A 
percentage (0-100). A datanode is considered balanced if for each volume, its 
utilization differs from the average datanode utilization by no more than this 
threshold. |
+| `hdds.datanode.disk.balancer.max.disk.throughputInMBPerSec` | `10`           
                                                                        | The 
maximum bandwidth (in MB/s) that the balancer can use for moving data, to avoid 
impacting client I/O.                                                           
      |
+| `hdds.datanode.disk.balancer.parallel.thread`               | `5`            
                                                                        | The 
number of worker threads to use for moving containers in parallel.              
                                                                                
      |
+| `hdds.datanode.disk.balancer.service.interval`              | `60s`          
                                                                        | The 
time interval at which the Datanode DiskBalancer service checks for imbalance 
and updates its configuration.                                                  
        |
+| `hdds.datanode.disk.balancer.stop.after.disk.even`          | `true`         
                                                                        | If 
true, the DiskBalancer will automatically stop its balancing activity once 
disks are considered balanced (i.e., all volume densities are within the 
threshold).        |
+| `hdds.datanode.disk.balancer.volume.choosing.policy`        | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultVolumeChoosingPolicy`
    | The policy class for selecting source and destination volumes for 
balancing.                                                                      
                        |
+| `hdds.datanode.disk.balancer.container.choosing.policy`     | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultContainerChoosingPolicy`
 | The policy class for selecting which containers to move from a source volume 
to destination volume.                                                          
             |
+| `hdds.datanode.disk.balancer.service.timeout`               | `300s`         
                                                                        | 
Timeout for the Datanode DiskBalancer service operations.                       
                                                                                
          |
+| `hdds.datanode.disk.balancer.should.run.default`            | `false`        
                                                                        | If 
the balancer fails to read its persisted configuration, this value determines 
if the service should run by default.                                           
         |
 
diff --git a/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md 
b/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md
index a44fb07cd7c..d417b2c1fc3 100644
--- a/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md
+++ b/hadoop-hdds/docs/content/feature/DiskBalancer.zh.md
@@ -102,15 +102,16 @@ ozone admin datanode diskbalancer report --count <N>
 
 The DiskBalancer's behavior can be controlled using the following 
configuration properties in `ozone-site.xml`.
 
-| Property                                                     | Default Value 
                         | Description                                          
                                                                                
                                        |
-| ------------------------------------------------------------ 
|----------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `hdds.datanode.disk.balancer.volume.density.threshold`       | `10.0`        
                         | 
百分比(0-100)。如果对于每个卷,其利用率与平均数据节点利用率之差不超过此阈值,则认为数据节点处于平衡状态。    |
-| `hdds.datanode.disk.balancer.max.disk.throughputInMBPerSec`  | `10`          
                         | 平衡器可用于移动数据的最大带宽(以 MB/s 为单位),以避免影响客户端 I/O。            
                                                        |
-| `hdds.datanode.disk.balancer.parallel.thread`                | `5`           
                         | 用于并行移动容器的工作线程数。                                      
                                                                 |
-| `hdds.datanode.disk.balancer.service.interval`               | `60s`         
                         | Datanode DiskBalancer 服务检查不平衡并更新其配置的时间间隔。            
                                                 |
-| `hdds.datanode.disk.balancer.stop.after.disk.even`           | `true`        
                         | 如果为真,则一旦磁盘被视为平衡(即所有卷密度都在阈值内),DiskBalancer 
将自动停止其平衡活动。           |
-| `hdds.datanode.disk.balancer.volume.choosing.policy`         | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultVolumeChoosingPolicy`
 | 用于选择平衡的源卷和目标卷的策略类。                                                           
                                      |
-| `hdds.datanode.disk.balancer.container.choosing.policy`      | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultContainerChoosingPolicy`
 | 用于选择将哪些容器从源卷移动到目标卷的策略类。                                                      
                   |
-| `hdds.datanode.disk.balancer.service.timeout`                | `300s`        
                         | Datanode DiskBalancer 服务操作超时。                        
                                                                                
            |
-| `hdds.datanode.disk.balancer.should.run.default`             | `false`       
                         | 如果平衡器无法读取其持久配置,则该值决定服务是否应默认运行。                       
                                |
+| Property                                                    | Default Value  
                        | Description                                           
                                                                                
                                      |
+|-------------------------------------------------------------|----------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `hdds.datanode.disk.balancer.enabled`                       | `false`        
                        | 如果为 false,则 Datanode 上的 DiskBalancer 服务将被禁用。将其配置为 
true 可启用 DiskBalancer。                                                      |   
                                                         |                      
                                                                  |             
                                                                                
                  [...]
+| `hdds.datanode.disk.balancer.volume.density.threshold`      | `10.0`         
                        | 
百分比(0-100)。如果对于每个卷,其利用率与平均数据节点利用率之差不超过此阈值,则认为数据节点处于平衡状态。    |
+| `hdds.datanode.disk.balancer.max.disk.throughputInMBPerSec` | `10`           
                        | 平衡器可用于移动数据的最大带宽(以 MB/s 为单位),以避免影响客户端 I/O。             
                                                       |
+| `hdds.datanode.disk.balancer.parallel.thread`               | `5`            
                        | 用于并行移动容器的工作线程数。                                       
                                                                |
+| `hdds.datanode.disk.balancer.service.interval`              | `60s`          
                        | Datanode DiskBalancer 服务检查不平衡并更新其配置的时间间隔。             
                                                |
+| `hdds.datanode.disk.balancer.stop.after.disk.even`          | `true`         
                        | 如果为真,则一旦磁盘被视为平衡(即所有卷密度都在阈值内),DiskBalancer 将自动停止其平衡活动。 
          |
+| `hdds.datanode.disk.balancer.volume.choosing.policy`        | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultVolumeChoosingPolicy`
 | 用于选择平衡的源卷和目标卷的策略类。                                                           
                                      |
+| `hdds.datanode.disk.balancer.container.choosing.policy`     | 
`org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultContainerChoosingPolicy`
 | 用于选择将哪些容器从源卷移动到目标卷的策略类。                                                      
                   |
+| `hdds.datanode.disk.balancer.service.timeout`               | `300s`         
                        | Datanode DiskBalancer 服务操作超时。                         
                                                                                
           |
+| `hdds.datanode.disk.balancer.should.run.default`            | `false`        
                        | 如果平衡器无法读取其持久配置,则该值决定服务是否应默认运行。                        
                               |
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
index 6abccae96be..24eaa586b31 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
@@ -51,6 +51,7 @@
 import java.util.stream.Stream;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.client.ReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.conf.ReconfigurationHandler;
@@ -1522,6 +1523,7 @@ public List<ContainerInfo> getListOfContainers(
   @Override
   public List<HddsProtos.DatanodeDiskBalancerInfoProto> getDiskBalancerReport(
       int count, int clientVersion) throws IOException {
+    checkDiskBalancerEnabled();
     return scm.getDiskBalancerManager().getDiskBalancerReport(count,
         clientVersion);
   }
@@ -1531,6 +1533,7 @@ public List<HddsProtos.DatanodeDiskBalancerInfoProto> 
getDiskBalancerStatus(
       Optional<List<String>> hosts,
       Optional<HddsProtos.DiskBalancerRunningStatus> status,
       int clientVersion) throws IOException {
+    checkDiskBalancerEnabled();
     return scm.getDiskBalancerManager().getDiskBalancerStatus(hosts, status,
         clientVersion);
   }
@@ -1540,6 +1543,8 @@ public List<DatanodeAdminError> 
startDiskBalancer(Optional<Double> threshold,
       Optional<Long> bandwidthInMB, Optional<Integer> parallelThread,
       Optional<Boolean> stopAfterDiskEven, Optional<List<String>> hosts)
       throws IOException {
+    checkDiskBalancerEnabled();
+
     try {
       getScm().checkAdminAccess(getRemoteUser(), false);
     } catch (IOException e) {
@@ -1554,6 +1559,8 @@ public List<DatanodeAdminError> 
startDiskBalancer(Optional<Double> threshold,
   @Override
   public List<DatanodeAdminError> stopDiskBalancer(Optional<List<String>> 
hosts)
       throws IOException {
+    checkDiskBalancerEnabled();
+
     try {
       getScm().checkAdminAccess(getRemoteUser(), false);
     } catch (IOException e) {
@@ -1568,6 +1575,8 @@ public List<DatanodeAdminError> 
updateDiskBalancerConfiguration(
       Optional<Double> threshold, Optional<Long> bandwidthInMB,
       Optional<Integer> parallelThread, Optional<Boolean> stopAfterDiskEven, 
Optional<List<String>> hosts)
       throws IOException {
+    checkDiskBalancerEnabled();
+
     try {
       getScm().checkAdminAccess(getRemoteUser(), false);
     } catch (IOException e) {
@@ -1579,6 +1588,14 @@ public List<DatanodeAdminError> 
updateDiskBalancerConfiguration(
         threshold, bandwidthInMB, parallelThread, stopAfterDiskEven, hosts);
   }
 
+  private void checkDiskBalancerEnabled() throws SCMException {
+    if (scm.getDiskBalancerManager() == null) {
+      throw new SCMException("Disk Balancer is not enabled. Please enable " +
+          "the '" + HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY +
+          "' configuration key.");
+    }
+  }
+
   /**
    * Queries a list of Node that match a set of statuses.
    *
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
index a52963635a8..5fa56be25fc 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java
@@ -501,8 +501,6 @@ private void initializeEventHandlers() {
             scmNodeManager, containerManager, scmContext);
     PipelineActionHandler pipelineActionHandler =
         new PipelineActionHandler(pipelineManager, scmContext);
-    DiskBalancerReportHandler diskBalancerReportHandler =
-        new DiskBalancerReportHandler(diskBalancerManager);
 
     ReplicationManagerEventHandler replicationManagerEventHandler =
         new ReplicationManagerEventHandler(replicationManager, scmContext);
@@ -588,8 +586,13 @@ private void initializeEventHandlers() {
     scmNodeManager.registerSendCommandNotify(
         SCMCommandProto.Type.deleteBlocksCommand,
         scmBlockManager.getDeletedBlockLog()::onSent);
-    eventQueue.addHandler(SCMEvents.DISK_BALANCER_REPORT,
-        diskBalancerReportHandler);
+
+    if (diskBalancerManager != null) {
+      DiskBalancerReportHandler diskBalancerReportHandler =
+          new DiskBalancerReportHandler(diskBalancerManager);
+      eventQueue.addHandler(SCMEvents.DISK_BALANCER_REPORT,
+          diskBalancerReportHandler);
+    }
   }
 
   private void initializeCertificateClient() throws IOException {
@@ -857,8 +860,14 @@ private void initializeSystemManagers(OzoneConfiguration 
conf,
         .setSCMDBTransactionBuffer(scmHAManager.getDBTransactionBuffer())
         .setRatisServer(scmHAManager.getRatisServer())
         .build();
-    diskBalancerManager = new DiskBalancerManager(conf, eventQueue, scmContext,
-        scmNodeManager);
+    if (conf.getBoolean(HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY,
+        HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_DEFAULT)) {
+      diskBalancerManager = new DiskBalancerManager(conf, eventQueue, 
scmContext,
+          scmNodeManager);
+    } else {
+      diskBalancerManager = null;
+      LOG.info("Disk Balancer is disabled.");
+    }
   }
 
   /**
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
index bae430c66bb..e3aaf0d87b0 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDiskBalancerManager.java
@@ -55,6 +55,7 @@ public class TestDiskBalancerManager {
   @BeforeEach
   public void setup() throws Exception {
     conf = new OzoneConfiguration();
+    conf.setBoolean(HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY, 
true);
     conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath());
     nodeManager = new MockNodeManager(true, 3);
     diskBalancerManager = new DiskBalancerManager(conf, new EventQueue(),
diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/diskbalancer.yaml 
b/hadoop-ozone/dist/src/main/compose/ozonesecure/diskbalancer.yaml
new file mode 100644
index 00000000000..6ea6f51f2bd
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/diskbalancer.yaml
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+x-disk-balancer-config:
+  &disk-balancer-config
+  environment:
+    - OZONE-SITE.XML_hdds.datanode.disk.balancer.enabled=true
+
+services:
+  scm:
+    <<: *disk-balancer-config
+  datanode:
+    <<: *disk-balancer-config
diff --git 
a/hadoop-ozone/dist/src/main/compose/ozonesecure/test-diskbalancer.sh 
b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-diskbalancer.sh
new file mode 100644
index 00000000000..761e57f2875
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/test-diskbalancer.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#suite:diskbalancer
+
+set -u -o pipefail
+
+COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+export COMPOSE_DIR
+
+# shellcheck source=/dev/null
+source "$COMPOSE_DIR/../testlib.sh"
+
+export SECURITY_ENABLED=true
+export COMPOSE_FILE=docker-compose.yaml:diskbalancer.yaml
+
+start_docker_env
+
+execute_robot_test scm diskbalancer
diff --git 
a/hadoop-ozone/dist/src/main/smoketest/admincli/testdiskbalancer.robot 
b/hadoop-ozone/dist/src/main/smoketest/diskbalancer/testdiskbalancer.robot
similarity index 100%
rename from hadoop-ozone/dist/src/main/smoketest/admincli/testdiskbalancer.robot
rename to 
hadoop-ozone/dist/src/main/smoketest/diskbalancer/testdiskbalancer.robot
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
index f2c60d98996..cde2a7ce2ac 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancer.java
@@ -31,6 +31,7 @@
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
+import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -68,6 +69,7 @@ public class TestDiskBalancer {
   @BeforeAll
   public static void setup() throws Exception {
     ozoneConf = new OzoneConfiguration();
+    
ozoneConf.setBoolean(HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY, 
true);
     ozoneConf.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY,
         SCMContainerPlacementCapacity.class, PlacementPolicy.class);
     ozoneConf.setTimeDuration("hdds.datanode.disk.balancer.service.interval", 
3, TimeUnit.SECONDS);
@@ -133,7 +135,7 @@ public void testDiskBalancerStopAfterEven() throws 
IOException,
               " the property StopAfterDiskEven is set to true"))
           .count();
       return count >= cluster.getHddsDatanodes().size();
-    }, 100, 5000); // check every 100ms, timeout after 5s
+    }, 100, 10000); // check every 100ms, timeout after 10s
   }
 
   @Test
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancerDuringDecommissionAndMaintenance.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancerDuringDecommissionAndMaintenance.java
index 3f684cecbd7..b33b119ce80 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancerDuringDecommissionAndMaintenance.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestDiskBalancerDuringDecommissionAndMaintenance.java
@@ -33,6 +33,7 @@
 import java.util.Optional;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
+import org.apache.hadoop.hdds.HddsConfigKeys;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -73,6 +74,7 @@ public class TestDiskBalancerDuringDecommissionAndMaintenance 
{
   @BeforeAll
   public static void setup() throws Exception {
     conf = new OzoneConfiguration();
+    conf.setBoolean(HddsConfigKeys.HDDS_DATANODE_DISK_BALANCER_ENABLED_KEY, 
true);
     conf.setClass(ScmConfigKeys.OZONE_SCM_CONTAINER_PLACEMENT_IMPL_KEY,
         SCMContainerPlacementCapacity.class, PlacementPolicy.class);
     conf.setTimeDuration("hdds.datanode.disk.balancer.service.interval", 2, 
TimeUnit.SECONDS);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to