This is an automated email from the ASF dual-hosted git repository.
sammichen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new a2e865a3a43 HDDS-13955. Handle empty datanode.id file gracefully
(#9479)
a2e865a3a43 is described below
commit a2e865a3a439c6103dce6bbe8846df4d8e44362f
Author: Neo Chien <[email protected]>
AuthorDate: Mon Jan 5 13:31:41 2026 +0800
HDDS-13955. Handle empty datanode.id file gracefully (#9479)
---
.../apache/hadoop/ozone/HddsDatanodeService.java | 2 +-
.../container/common/helpers/ContainerUtils.java | 69 ++++++++++++++++++----
.../container/common/helpers/DatanodeIdYaml.java | 7 +++
.../container/common/utils/StorageVolumeUtil.java | 2 +-
.../common/helpers/TestContainerUtils.java | 50 ++++++++++++++--
.../datanode/schemaupgrade/UpgradeUtils.java | 2 +-
6 files changed, 113 insertions(+), 19 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
index 9bd10a83b3e..bb3810431e5 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
@@ -455,7 +455,7 @@ private DatanodeDetails initializeDatanodeDetails()
File idFile = new File(idFilePath);
DatanodeDetails details;
if (idFile.exists()) {
- details = ContainerUtils.readDatanodeDetailsFrom(idFile);
+ details = ContainerUtils.readDatanodeDetailsFrom(idFile, conf);
} else {
// There is no datanode.id file, this might be the first time datanode
// is started.
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
index 33d7dc9a324..7d16546fb69 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
@@ -34,7 +34,9 @@
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.Collection;
import java.util.Objects;
+import java.util.Properties;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -49,10 +51,12 @@
import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.hdds.utils.HddsServerUtil;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.common.impl.ContainerData;
import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
+import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil;
import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.slf4j.Logger;
@@ -166,25 +170,68 @@ public static synchronized void writeDatanodeDetailsTo(
* @return {@link DatanodeDetails}
* @throws IOException If the id file is malformed or other I/O exceptions
*/
- public static synchronized DatanodeDetails readDatanodeDetailsFrom(File path)
- throws IOException {
+ public static synchronized DatanodeDetails readDatanodeDetailsFrom(
+ File path, ConfigurationSource conf) throws IOException {
if (!path.exists()) {
throw new IOException("Datanode ID file not found.");
}
try {
return DatanodeIdYaml.readDatanodeIdFile(path);
} catch (IOException e) {
- LOG.warn("Error loading DatanodeDetails yaml from {}",
- path.getAbsolutePath(), e);
- // Try to load as protobuf before giving up
- try (InputStream in = Files.newInputStream(path.toPath())) {
- return DatanodeDetails.getFromProtoBuf(
- HddsProtos.DatanodeDetailsProto.parseFrom(in));
- } catch (IOException io) {
- throw new IOException("Failed to parse DatanodeDetails from "
- + path.getAbsolutePath(), io);
+ LOG.warn("Failed to read Datanode ID file as YAML. " +
+ "Attempting recovery.", e);
+ try {
+ return recoverDatanodeDetailsFromVersionFile(path, conf);
+ } catch (IOException recoveryEx) {
+ LOG.warn("Datanode ID recovery from VERSION file failed. " +
+ "Falling back to reading as Protobuf.", recoveryEx);
+ try {
+ return readDatanodeDetailsFromProto(path);
+ } catch (IOException io) {
+ throw new IOException("Failed to parse DatanodeDetails from "
+ + path.getAbsolutePath(), io);
+ }
+ }
+ }
+ }
+
+ /**
+ * Recover DatanodeDetails from VERSION file.
+ */
+ private static DatanodeDetails recoverDatanodeDetailsFromVersionFile(
+ File path, ConfigurationSource conf) throws IOException {
+ LOG.info("Attempting to recover Datanode ID from VERSION file.");
+ String dnUuid = null;
+ Collection<String> dataNodeDirs =
+ HddsServerUtil.getDatanodeStorageDirs(conf);
+ for (String dataNodeDir : dataNodeDirs) {
+ File versionFile = new File(dataNodeDir, HddsVolume.HDDS_VOLUME_DIR +
"/" + StorageVolumeUtil.VERSION_FILE);
+ if (versionFile.exists()) {
+ Properties props = DatanodeVersionFile.readFrom(versionFile);
+ dnUuid = props.getProperty(OzoneConsts.DATANODE_UUID);
+ if (dnUuid != null && !dnUuid.isEmpty()) {
+ break;
+ }
}
}
+ if (dnUuid == null) {
+ throw new IOException("Could not find a valid datanode UUID from " +
+ "any VERSION file in " + dataNodeDirs);
+ }
+ DatanodeDetails.Builder builder = DatanodeDetails.newBuilder();
+ builder.setUuid(UUID.fromString(dnUuid));
+ DatanodeDetails datanodeDetails = builder.build();
+ DatanodeIdYaml.createDatanodeIdFile(datanodeDetails, path, conf);
+ LOG.info("Successfully recovered and rewrote datanode ID file.");
+ return datanodeDetails;
+ }
+
+ private static DatanodeDetails readDatanodeDetailsFromProto(File path)
+ throws IOException {
+ try (InputStream in = Files.newInputStream(path.toPath())) {
+ return DatanodeDetails.getFromProtoBuf(
+ HddsProtos.DatanodeDetailsProto.parseFrom(in));
+ }
}
/**
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
index d3fd432efef..07bdedb4398 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
@@ -87,6 +87,13 @@ public static DatanodeDetails readDatanodeIdFile(File path)
throw new IOException("Unable to parse yaml file.", e);
}
+ if (datanodeDetailsYaml == null
+ || datanodeDetailsYaml.getUuid() == null
+ || datanodeDetailsYaml.getUuid().isEmpty()) {
+ throw new IOException(
+ "Datanode ID file is empty or has null UUID: " +
path.getAbsolutePath());
+ }
+
DatanodeDetails.Builder builder = DatanodeDetails.newBuilder();
builder.setUuid(UUID.fromString(datanodeDetailsYaml.getUuid()))
.setIpAddress(datanodeDetailsYaml.getIpAddress())
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
index 5e6fe086a16..c71fc6cde6d 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/StorageVolumeUtil.java
@@ -43,7 +43,7 @@
*/
public final class StorageVolumeUtil {
- private static final String VERSION_FILE = "VERSION";
+ public static final String VERSION_FILE = "VERSION";
private static final String STORAGE_ID_PREFIX = "DS-";
private StorageVolumeUtil() {
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
index 2a2d90ae18c..e262e795aa6 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/helpers/TestContainerUtils.java
@@ -26,6 +26,7 @@
import static
org.apache.hadoop.ozone.container.ContainerTestHelper.getDummyCommandRequestProto;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.when;
@@ -37,6 +38,7 @@
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
+import java.util.UUID;
import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -45,6 +47,7 @@
import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.ByteStringConversion;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.ozone.common.ChunkBuffer;
import org.apache.ratis.thirdparty.com.google.protobuf.TextFormat;
import org.junit.jupiter.api.BeforeEach;
@@ -125,13 +128,13 @@ public void testDatanodeIDPersistent(@TempDir File
tempDir) throws Exception {
// Read should return an empty value if file doesn't exist
File nonExistFile = new File(tempDir, "non_exist.id");
assertThrows(IOException.class,
- () -> ContainerUtils.readDatanodeDetailsFrom(nonExistFile));
+ () -> ContainerUtils.readDatanodeDetailsFrom(nonExistFile, conf));
// Read should fail if the file is malformed
File malformedFile = new File(tempDir, "malformed.id");
createMalformedIDFile(malformedFile);
assertThrows(IOException.class,
- () -> ContainerUtils.readDatanodeDetailsFrom(malformedFile));
+ () -> ContainerUtils.readDatanodeDetailsFrom(malformedFile, conf));
// Test upgrade scenario - protobuf file instead of yaml
File protoFile = new File(tempDir, "valid-proto.id");
@@ -139,20 +142,57 @@ public void testDatanodeIDPersistent(@TempDir File
tempDir) throws Exception {
HddsProtos.DatanodeDetailsProto proto = id1.getProtoBufMessage();
proto.writeTo(out);
}
- assertDetailsEquals(id1,
ContainerUtils.readDatanodeDetailsFrom(protoFile));
+ assertDetailsEquals(id1,
ContainerUtils.readDatanodeDetailsFrom(protoFile, conf));
id1.setInitialVersion(1);
assertWriteRead(tempDir, id1);
}
}
+ @Test
+ public void testDatanodeIdRecovery(@TempDir File tempDir) throws IOException
{
+ // 1. Setup storage directory and VERSION file
+ String datanodeUuid = UUID.randomUUID().toString();
+ File storageDir = new File(tempDir, "datanode-storage");
+ assertTrue(storageDir.mkdirs());
+ conf.set(ScmConfigKeys.HDDS_DATANODE_DIR_KEY,
storageDir.getAbsolutePath());
+
+ File hddsSubDir = new File(storageDir, "hdds");
+ assertTrue(hddsSubDir.mkdirs());
+ File versionFile = new File(hddsSubDir, "VERSION");
+ DatanodeVersionFile dnVersionFile = new DatanodeVersionFile(
+ "storage-id", "cluster-id", datanodeUuid, System.currentTimeMillis(),
0);
+ dnVersionFile.createVersionFile(versionFile);
+
+ // 2. Simulate a corrupted/empty datanode.id file
+ File datanodeIdFile = new File(tempDir, "datanode.id");
+ assertTrue(datanodeIdFile.createNewFile());
+
+ assertEquals(0, datanodeIdFile.length(), "Datanode ID file should be empty
initially");
+
+ // 3. Call readDatanodeDetailsFrom and verify recovery
+ DatanodeDetails recoveredDetails =
+ ContainerUtils.readDatanodeDetailsFrom(datanodeIdFile, conf);
+
+ // 4. Assertions
+ // Recovered UUID matches the one in the VERSION file
+ assertEquals(datanodeUuid, recoveredDetails.getUuidString());
+
+ // datanode.id file is recreated and is not empty
+ assertTrue(datanodeIdFile.length() > 0, "Datanode ID file should have been
recreated with content");
+
+ // The recreated file can be read normally and contains the correct UUID
+ DatanodeDetails finalDetails =
ContainerUtils.readDatanodeDetailsFrom(datanodeIdFile, conf);
+ assertEquals(datanodeUuid, finalDetails.getUuidString());
+ }
+
private void assertWriteRead(@TempDir File tempDir,
DatanodeDetails details) throws IOException {
// Write a single ID to the file and read it out
File file = new File(tempDir, "valid-values.id");
ContainerUtils.writeDatanodeDetailsTo(details, file, conf);
- DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file);
+ DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file, conf);
assertDetailsEquals(details, read);
assertEquals(details.getCurrentVersion(), read.getCurrentVersion());
@@ -163,7 +203,7 @@ private void assertWriteReadWithChangedIpAddress(@TempDir
File tempDir,
// Write a single ID to the file and read it out
File file = new File(tempDir, "valid-values.id");
ContainerUtils.writeDatanodeDetailsTo(details, file, conf);
- DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file);
+ DatanodeDetails read = ContainerUtils.readDatanodeDetailsFrom(file, conf);
assertEquals(details.getIpAddress(), read.getIpAddress());
read.validateDatanodeIpAddress();
assertEquals("127.0.0.1", read.getIpAddress());
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
index 09c2480e9ef..e447f101154 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/datanode/schemaupgrade/UpgradeUtils.java
@@ -73,7 +73,7 @@ public static DatanodeDetails
getDatanodeDetails(OzoneConfiguration conf)
File idFile = new File(idFilePath);
Preconditions.checkState(idFile.exists(),
"Datanode id file: " + idFilePath + " not exists");
- return ContainerUtils.readDatanodeDetailsFrom(idFile);
+ return ContainerUtils.readDatanodeDetailsFrom(idFile, conf);
}
public static File getVolumeUpgradeCompleteFile(HddsVolume volume) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]