This is an automated email from the ASF dual-hosted git repository.
erose pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 498a9c14ddc HDDS-13092. Container scanner should trigger volume scan
when marking a container unhealthy (#8603)
498a9c14ddc is described below
commit 498a9c14ddc69c7f36dc4eb9c87b42443c6a3ca1
Author: Tejaskriya <[email protected]>
AuthorDate: Tue Jul 22 03:59:28 2025 +0530
HDDS-13092. Container scanner should trigger volume scan when marking a
container unhealthy (#8603)
Co-authored-by: Doroszlai, Attila <[email protected]>
---
.../container/ozoneimpl/ContainerScanHelper.java | 26 ++++++++++++++++++----
.../TestBackgroundContainerDataScanner.java | 15 +++++++++++++
.../TestBackgroundContainerMetadataScanner.java | 17 ++++++++++++++
.../ozoneimpl/TestContainerScannersAbstract.java | 8 +++++++
.../ozoneimpl/TestOnDemandContainerScanner.java | 11 +++++++++
5 files changed, 73 insertions(+), 4 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java
index c0e16c7de93..4c4a45c55d4 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerScanHelper.java
@@ -26,6 +26,7 @@
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.interfaces.ScanResult;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.slf4j.Logger;
@@ -75,7 +76,7 @@ public void scanData(Container<?> container,
DataTransferThrottler throttler, Ca
log.warn("Failed to update container checksum after scan of container
{}", containerId, ex);
}
if (result.hasErrors()) {
- handleUnhealthyScanResult(containerId, result);
+ handleUnhealthyScanResult(containerData, result);
}
metrics.incNumContainersScanned();
}
@@ -103,7 +104,7 @@ public void scanMetadata(Container<?> container)
return;
}
if (result.hasErrors()) {
- handleUnhealthyScanResult(containerId, result);
+ handleUnhealthyScanResult(containerData, result);
}
Instant now = Instant.now();
@@ -114,8 +115,8 @@ public void scanMetadata(Container<?> container)
logScanCompleted(containerData, now);
}
- public void handleUnhealthyScanResult(long containerID, ScanResult result)
throws IOException {
-
+ public void handleUnhealthyScanResult(ContainerData containerData,
ScanResult result) throws IOException {
+ long containerID = containerData.getContainerID();
log.error("Corruption detected in container [{}]. Marking it UNHEALTHY.
{}", containerID, result);
if (log.isDebugEnabled()) {
StringBuilder allErrorString = new StringBuilder();
@@ -130,6 +131,23 @@ public void handleUnhealthyScanResult(long containerID,
ScanResult result) throw
boolean containerMarkedUnhealthy =
controller.markContainerUnhealthy(containerID, result);
if (containerMarkedUnhealthy) {
metrics.incNumUnHealthyContainers();
+ // triggering a volume scan for the unhealthy container
+ triggerVolumeScan(containerData);
+ }
+ }
+
+ public void triggerVolumeScan(ContainerData containerData) {
+ HddsVolume volume = containerData.getVolume();
+ if (volume != null && !volume.isFailed()) {
+ log.info("Triggering scan of volume [{}] with unhealthy container [{}]",
+ volume, containerData.getContainerID());
+ StorageVolumeUtil.onFailure(volume);
+ } else if (volume == null) {
+ log.warn("Cannot trigger volume scan for container {} since its volume
is null",
+ containerData.getContainerID());
+ } else {
+ log.debug("Skipping volume scan for container {} since its volume {} has
failed.",
+ containerData.getContainerID(), volume);
}
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
index 93ef66680d8..508c472a7c8 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerDataScanner.java
@@ -32,6 +32,7 @@
import static org.mockito.Mockito.atMostOnce;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.eq;
+import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
@@ -48,9 +49,12 @@
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.hadoop.ozone.container.common.interfaces.ScanResult;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.mockito.MockedStatic;
import org.mockito.junit.jupiter.MockitoSettings;
import org.mockito.quality.Strictness;
@@ -133,6 +137,17 @@ public void testUnhealthyContainersDetected() throws
Exception {
verifyContainerMarkedUnhealthy(deletedContainer, never());
}
+ @Test
+ @Override
+ public void testUnhealthyContainersTriggersVolumeScan() throws Exception {
+ when(controller.markContainerUnhealthy(anyLong(),
any(ScanResult.class))).thenReturn(true);
+ try (MockedStatic<StorageVolumeUtil> mockedStatic =
mockStatic(StorageVolumeUtil.class)) {
+ scanner.runIteration();
+ verifyContainerMarkedUnhealthy(corruptData, atLeastOnce());
+ mockedStatic.verify(() ->
StorageVolumeUtil.onFailure(corruptData.getContainerData().getVolume()),
times(1));
+ }
+ }
+
@Test
public void testScanTimestampUpdated() throws Exception {
scanner.runIteration();
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
index abc3126f762..9b6c6aed3f0 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestBackgroundContainerMetadataScanner.java
@@ -32,7 +32,9 @@
import static org.mockito.Mockito.atMost;
import static org.mockito.Mockito.atMostOnce;
import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -45,9 +47,12 @@
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
+import org.apache.hadoop.ozone.container.common.interfaces.ScanResult;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.ozone.test.GenericTestUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.mockito.MockedStatic;
import org.mockito.junit.jupiter.MockitoSettings;
import org.mockito.quality.Strictness;
@@ -130,6 +135,18 @@ public void testUnhealthyContainersDetected() throws
Exception {
verifyContainerMarkedUnhealthy(openContainer, never());
}
+ @Test
+ @Override
+ public void testUnhealthyContainersTriggersVolumeScan() throws Exception {
+ when(controller.markContainerUnhealthy(anyLong(),
any(ScanResult.class))).thenReturn(true);
+ try (MockedStatic<StorageVolumeUtil> mockedStatic =
mockStatic(StorageVolumeUtil.class)) {
+ scanner.runIteration();
+ verifyContainerMarkedUnhealthy(openCorruptMetadata, atLeastOnce());
+ mockedStatic.verify(() ->
+
StorageVolumeUtil.onFailure(openCorruptMetadata.getContainerData().getVolume()),
times(1));
+ }
+ }
+
@Test
@Override
public void testUnhealthyContainerRescanned() throws Exception {
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
index f3537480970..7bd45c3b503 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerScannersAbstract.java
@@ -31,6 +31,7 @@
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
+import java.io.File;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
@@ -130,6 +131,9 @@ public abstract void
testPreviouslyScannedContainerIsScanned()
@Test
public abstract void testChecksumUpdateFailure() throws Exception;
+ @Test
+ public abstract void testUnhealthyContainersTriggersVolumeScan() throws
Exception;
+
// HELPER METHODS
protected void setScannedTimestampOld(Container<ContainerData> container) {
@@ -198,6 +202,10 @@ private ContainerController mockContainerController() {
MetadataScanResult healthyMetadata = getHealthyMetadataScanResult();
MetadataScanResult unhealthyMetadata = getUnhealthyMetadataScanResult();
+ File volLocation = mock(File.class);
+
when(volLocation.getPath()).thenReturn("/temp/volume-testcontainerscanner");
+ when(vol.getStorageDir()).thenReturn(volLocation);
+
// healthy container
ContainerTestUtils.setupMockContainer(healthy,
true, healthyMetadata, healthyData,
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java
index 98a65ca761d..8bd8f2060ba 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestOnDemandContainerScanner.java
@@ -53,6 +53,7 @@
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.interfaces.ScanResult;
+import org.apache.ozone.test.GenericTestUtils.LogCapturer;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -347,6 +348,16 @@ public void testMerkleTreeWritten() throws Exception {
}
}
+ @Test
+ @Override
+ public void testUnhealthyContainersTriggersVolumeScan() throws Exception {
+ when(controller.markContainerUnhealthy(anyLong(),
any(ScanResult.class))).thenReturn(true);
+ LogCapturer logCapturer =
LogCapturer.captureLogs(OnDemandContainerScanner.class);
+ scanContainer(corruptData);
+ verifyContainerMarkedUnhealthy(corruptData, times(1));
+ assertTrue(logCapturer.getOutput().contains("Triggering scan of volume"));
+ }
+
private void scanContainer(Container<?> container) throws Exception {
Optional<Future<?>> scanFuture = onDemandScanner.scanContainer(container);
if (scanFuture.isPresent()) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]