This is an automated email from the ASF dual-hosted git repository.

sammichen pushed a commit to branch HDDS-5713
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-5713 by this push:
     new 65a0cd0475b HDDS-14279. Double check selected container state before 
move process starts (#9575)
65a0cd0475b is described below

commit 65a0cd0475b2d74aeb857d799e2a207d1264be40
Author: Gargi Jaiswal <[email protected]>
AuthorDate: Mon Jan 5 11:02:37 2026 +0530

    HDDS-14279. Double check selected container state before move process 
starts (#9575)
---
 .../diskbalancer/DiskBalancerService.java          | 21 +++++-
 .../policy/DefaultContainerChoosingPolicy.java     |  5 ++
 .../diskbalancer/TestDiskBalancerTask.java         | 74 ++++++++++++++++++++++
 3 files changed, 97 insertions(+), 3 deletions(-)

diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
index 7490c5bb3ea..957f187491f 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/DiskBalancerService.java
@@ -45,7 +45,7 @@
 import org.apache.hadoop.hdds.conf.StorageUnit;
 import org.apache.hadoop.hdds.fs.SpaceUsageSource;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import 
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DiskBalancerRunningStatus;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -70,6 +70,7 @@
 import 
org.apache.hadoop.ozone.container.common.volume.VolumeChoosingPolicyFactory;
 import 
org.apache.hadoop.ozone.container.diskbalancer.DiskBalancerVolumeCalculation.VolumeFixedUsage;
 import 
org.apache.hadoop.ozone.container.diskbalancer.policy.ContainerChoosingPolicy;
+import 
org.apache.hadoop.ozone.container.diskbalancer.policy.DefaultContainerChoosingPolicy;
 import 
org.apache.hadoop.ozone.container.diskbalancer.policy.DiskBalancerVolumeChoosingPolicy;
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
 import 
org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerLocationUtil;
@@ -457,6 +458,20 @@ public BackgroundTaskResult call() {
         postCall(false, startTime);
         return BackgroundTaskResult.EmptyTaskResult.newResult();
       }
+
+      // Double check container state before acquiring lock to start move 
process.
+      // Container state may have changed after selection. Only CLOSED 
containers can be moved.
+      // QUASI_CLOSED is allowed when test mode is enabled, this is done to 
test in production
+      // these containers are rejected.
+      State containerState = container.getContainerData().getState();
+      boolean isTestMode = DefaultContainerChoosingPolicy.isTest();
+      if (containerState != State.CLOSED && !(isTestMode && containerState == 
State.QUASI_CLOSED)) {
+        LOG.warn("Container {} is in {} state, skipping move process. Only 
CLOSED containers can be moved.",
+            containerId, containerState);
+        postCall(false, startTime);
+        return BackgroundTaskResult.EmptyTaskResult.newResult();
+      }
+
       // hold read lock on the container first, to avoid other threads to 
update the container state,
       // such as block deletion.
       container.readLock();
@@ -477,8 +492,8 @@ public BackgroundTaskResult call() {
         // Before move the container directory to final place, the destination 
dir is empty and doesn't have
         // a metadata directory. Writing the .container file will fail as the 
metadata dir doesn't exist.
         // So we instead save the container file to the diskBalancerTmpDir.
-        ContainerProtos.ContainerDataProto.State originalState = 
tempContainerData.getState();
-        
tempContainerData.setState(ContainerProtos.ContainerDataProto.State.RECOVERING);
+        State originalState = tempContainerData.getState();
+        tempContainerData.setState(State.RECOVERING);
         // update tempContainerData volume to point to destVolume
         tempContainerData.setVolume(destVolume);
         // overwrite the .container file with the new state.
diff --git 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultContainerChoosingPolicy.java
 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultContainerChoosingPolicy.java
index 1f55a976548..f964f6f519e 100644
--- 
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultContainerChoosingPolicy.java
+++ 
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/diskbalancer/policy/DefaultContainerChoosingPolicy.java
@@ -97,4 +97,9 @@ public ContainerData chooseContainer(OzoneContainer 
ozoneContainer,
   public static void setTest(boolean isTest) {
     test = isTest;
   }
+
+  @VisibleForTesting
+  public static boolean isTest() {
+    return test;
+  }
 }
diff --git 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerTask.java
 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerTask.java
index 76a3992658b..5e2df4eb392 100644
--- 
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerTask.java
+++ 
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/diskbalancer/TestDiskBalancerTask.java
@@ -82,6 +82,7 @@
 import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.ozone.test.GenericTestUtils;
+import org.apache.ozone.test.GenericTestUtils.LogCapturer;
 import org.assertj.core.api.Fail;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
@@ -602,6 +603,79 @@ public void 
testOldReplicaDelayedDeletion(ContainerTestVersionInfo versionInfo)
     assertFalse(oldContainerDir.exists());
   }
 
+  /**
+   * Testing that invalid states (including QUASI_CLOSED in production mode) 
are correctly rejected.
+   * Here, with QUASI_CLOSED state, we ensure that the test runs in production 
mode
+   * where QUASI_CLOSED is not allowed for move.
+   */
+  @ParameterizedTest
+  @EnumSource(names = {"OPEN", "CLOSING", "QUASI_CLOSED", "UNHEALTHY", 
"INVALID", "DELETED", "RECOVERING"})
+  public void testMoveSkippedWhenContainerStateChanged(State invalidState)
+      throws IOException, InterruptedException, TimeoutException {
+    LogCapturer serviceLog = 
LogCapturer.captureLogs(DiskBalancerService.class);
+
+    // Create a CLOSED container which will be selected by 
DefaultContainerChoosingPolicy
+    Container container = createContainer(CONTAINER_ID, sourceVolume, 
State.CLOSED);
+    long initialSourceUsed = sourceVolume.getCurrentUsage().getUsedSpace();
+    long initialDestUsed = destVolume.getCurrentUsage().getUsedSpace();
+    long initialDestCommitted = destVolume.getCommittedBytes();
+    long initialSourceDelta = 
diskBalancerService.getDeltaSizes().get(sourceVolume) == null ?
+        0L : diskBalancerService.getDeltaSizes().get(sourceVolume);
+    String oldContainerPath = container.getContainerData().getContainerPath();
+
+    // Verify temp container directory doesn't exist before task execution
+    Path tempContainerDir = destVolume.getTmpDir().toPath()
+        .resolve(DISK_BALANCER_DIR).resolve(String.valueOf(CONTAINER_ID));
+    assertFalse(Files.exists(tempContainerDir));
+
+    // Get the task (container is selected as CLOSED)
+    DiskBalancerService.DiskBalancerTask task = getTask();
+    assertNotNull(task);
+
+    // Change container state to invalid state (OPEN or DELETED) before move 
process starts
+    KeyValueContainerData containerData = (KeyValueContainerData) 
container.getContainerData();
+    containerData.setState(invalidState);
+
+    // Execute the task - it should skip the move due to invalid state
+    task.call();
+
+    // Verify that move process was skipped
+    GenericTestUtils.waitFor(() ->
+            serviceLog.getOutput().contains("skipping move process") &&
+            serviceLog.getOutput().contains(String.valueOf(CONTAINER_ID)) &&
+            serviceLog.getOutput().contains(invalidState.toString()),
+        100, 5000);
+
+    // Verify container is still in the original location
+    Container originalContainer = containerSet.getContainer(CONTAINER_ID);
+    assertNotNull(originalContainer);
+    assertEquals(container, originalContainer);
+    assertEquals(invalidState, originalContainer.getContainerState());
+    assertEquals(sourceVolume, 
originalContainer.getContainerData().getVolume());
+    assertTrue(new File(oldContainerPath).exists(), "Container should still 
exist in original location");
+
+    // Verify no temp directory was created
+    assertFalse(Files.exists(tempContainerDir), "Temp container directory 
should not be created");
+
+    // Verify volume usage is unchanged
+    assertEquals(initialSourceUsed, 
sourceVolume.getCurrentUsage().getUsedSpace());
+    assertEquals(initialDestUsed, destVolume.getCurrentUsage().getUsedSpace());
+
+    // Verify metrics show failure (since move was skipped)
+    assertEquals(1, diskBalancerService.getMetrics().getFailureCount());
+    assertEquals(0, diskBalancerService.getMetrics().getSuccessCount());
+    assertEquals(0, diskBalancerService.getMetrics().getSuccessBytes());
+
+    // Verify committed bytes are released
+    assertEquals(initialDestCommitted, destVolume.getCommittedBytes());
+
+    // Verify container is removed from in-progress set
+    
assertFalse(diskBalancerService.getInProgressContainers().contains(ContainerID.valueOf(CONTAINER_ID)));
+
+    // Verify delta sizes are restored
+    assertEquals(initialSourceDelta, 
diskBalancerService.getDeltaSizes().get(sourceVolume));
+  }
+
   private KeyValueContainer createContainer(long containerId, HddsVolume vol, 
State state)
       throws IOException {
     KeyValueContainerData containerData = new KeyValueContainerData(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to