This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 0d1ceb35ec2 HDDS-13771. Hard link list file structure should have all
paths relative to metadata directory (#9132)
0d1ceb35ec2 is described below
commit 0d1ceb35ec29b62381270ff08e8de610fb734345
Author: Sadanand Shenoy <[email protected]>
AuthorDate: Tue Dec 9 17:14:59 2025 +0530
HDDS-13771. Hard link list file structure should have all paths relative to
metadata directory (#9132)
---
.../java/org/apache/hadoop/ozone/OzoneConsts.java | 1 +
hadoop-hdds/framework/pom.xml | 4 +
.../hadoop/hdds/utils/RDBSnapshotProvider.java | 26 ++-
.../utils/db/InodeMetadataRocksDBCheckpoint.java | 150 ++++++++++++++
.../hadoop/hdds/utils/db/RocksDBCheckpoint.java | 2 +
.../ozone/recon/TestReconWithOzoneManagerHA.java | 4 +-
.../TestOMDbCheckpointServletInodeBasedXfer.java | 60 ++----
.../hadoop/ozone/om/TestOMRatisSnapshots.java | 91 ++++++++-
.../snapshot/TestOzoneManagerSnapshotProvider.java | 17 +-
.../om/OMDBCheckpointServletInodeBasedXfer.java | 8 -
.../org/apache/hadoop/ozone/om/OzoneManager.java | 225 ++++++++++++++-------
.../om/ratis_snapshot/OmRatisSnapshotProvider.java | 8 +
.../hadoop/ozone/om/snapshot/OmSnapshotUtils.java | 63 +-----
.../hadoop/ozone/om/TestOmSnapshotManager.java | 19 +-
.../defrag/TestInodeMetadataRocksDBCheckpoint.java | 76 +++++++
.../spi/impl/OzoneManagerServiceProviderImpl.java | 11 +-
16 files changed, 547 insertions(+), 218 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
index d9f25f28de3..df8772d36f8 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
@@ -564,6 +564,7 @@ public final class OzoneConsts {
* the OMDBCheckpoint functions.
*/
public static final String OM_SNAPSHOT_SEPARATOR = "-";
+ public static final String HARDLINK_SEPARATOR = "\t";
private OzoneConsts() {
// Never Constructed
diff --git a/hadoop-hdds/framework/pom.xml b/hadoop-hdds/framework/pom.xml
index c30cde6de28..7f4f0070cae 100644
--- a/hadoop-hdds/framework/pom.xml
+++ b/hadoop-hdds/framework/pom.xml
@@ -112,6 +112,10 @@
<groupId>jakarta.annotation</groupId>
<artifactId>jakarta.annotation-api</artifactId>
</dependency>
+ <dependency>
+ <groupId>jakarta.validation</groupId>
+ <artifactId>jakarta.validation-api</artifactId>
+ </dependency>
<dependency>
<groupId>jakarta.ws.rs</groupId>
<artifactId>jakarta.ws.rs-api</artifactId>
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java
index b40a1f84e15..1b00571f72e 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/RDBSnapshotProvider.java
@@ -121,13 +121,13 @@ public DBCheckpoint downloadDBSnapshotFromLeader(String
leaderNodeID)
numDownloaded.incrementAndGet();
injectPause();
- RocksDBCheckpoint checkpoint = getCheckpointFromSnapshotFile(targetFile,
+ Path unTarredDb = untarContentsOfTarball(targetFile,
candidateDir, true);
LOG.info("Successfully untar the downloaded snapshot {} at {}.",
- targetFile, checkpoint.getCheckpointLocation());
- if (ratisSnapshotComplete(checkpoint.getCheckpointLocation())) {
+ targetFile, unTarredDb.toAbsolutePath());
+ if (ratisSnapshotComplete(unTarredDb)) {
LOG.info("Ratis snapshot transfer is complete.");
- return checkpoint;
+ return getCheckpointFromUntarredDb(unTarredDb);
}
}
}
@@ -175,15 +175,25 @@ public String getSnapshotFileName(String leaderNodeID) {
}
/**
- * Untar the downloaded snapshot and convert to {@link RocksDBCheckpoint}.
+ * convert untarredDbDir to to {@link RocksDBCheckpoint}.
*
+ * @return {@link RocksDBCheckpoint}
+ * @throws IOException
+ */
+ public DBCheckpoint getCheckpointFromUntarredDb(Path untarredDbDir) throws
IOException {
+ return new RocksDBCheckpoint(untarredDbDir);
+ }
+
+ /**
+ *
+ * Untar the downloaded snapshot.
* @param snapshot the downloaded snapshot tar file
* @param untarDir the directory to place the untarred files
* @param deleteSnapshot whether to delete the downloaded snapshot tar file
- * @return {@link RocksDBCheckpoint}
+ * @return path of untarred dbDir.
* @throws IOException
*/
- public RocksDBCheckpoint getCheckpointFromSnapshotFile(File snapshot,
+ private Path untarContentsOfTarball(File snapshot,
File untarDir, boolean deleteSnapshot) throws IOException {
// Untar the checkpoint file.
Path untarredDbDir = untarDir.toPath();
@@ -192,7 +202,7 @@ public RocksDBCheckpoint getCheckpointFromSnapshotFile(File
snapshot,
if (deleteSnapshot) {
FileUtil.fullyDelete(snapshot);
}
- return new RocksDBCheckpoint(untarredDbDir);
+ return untarredDbDir;
}
/**
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/InodeMetadataRocksDBCheckpoint.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/InodeMetadataRocksDBCheckpoint.java
new file mode 100644
index 00000000000..f70e4b2a515
--- /dev/null
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/InodeMetadataRocksDBCheckpoint.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.utils.db;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.ozone.OzoneConsts;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * RocksDB checkpoint implementation that uses hardlinks to optimize disk space
+ * for inode-based metadata checkpoints.
+ *
+ * <p>During construction, reads a hardlink mapping file and creates hardlinks
+ * from checkpoint files to the checkpoint_data directory. Original files are
+ * then deleted since they're accessible via hardlinks, saving disk space while
+ * maintaining checkpoint functionality.
+ * </p>
+ */
+public class InodeMetadataRocksDBCheckpoint implements DBCheckpoint {
+
+ private final Path checkpointLocation;
+ private final long checkpointTimestamp = System.currentTimeMillis();
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(InodeMetadataRocksDBCheckpoint.class);
+
+ public static final String OM_HARDLINK_FILE = "hardLinkFile";
+
+ public InodeMetadataRocksDBCheckpoint(Path checkpointLocation) throws
IOException {
+ this.checkpointLocation = checkpointLocation;
+ installHardLinks();
+ }
+
+ @Override
+ public Path getCheckpointLocation() {
+ return this.checkpointLocation;
+ }
+
+ @Override
+ public long getCheckpointTimestamp() {
+ return this.checkpointTimestamp;
+ }
+
+ @Override
+ public long getLatestSequenceNumber() {
+ return -1;
+ }
+
+ @Override
+ public long checkpointCreationTimeTaken() {
+ return 0L;
+ }
+
+ @Override
+ public void cleanupCheckpoint() throws IOException {
+ LOG.info("Cleaning up RocksDB checkpoint at {}",
+ checkpointLocation.toString());
+ FileUtils.deleteDirectory(checkpointLocation.toFile());
+ }
+
+ private void installHardLinks() throws IOException {
+ File hardLinkFile = new File(checkpointLocation.toFile(),
+ OM_HARDLINK_FILE);
+
+ if (!hardLinkFile.exists()) {
+ LOG.error("Hardlink file : {} does not exist.", hardLinkFile);
+ return;
+ }
+
+ // Track source files that need to be deleted after hardlink creation
+ Set<Path> sourceFilesToDelete = new HashSet<>();
+
+ // Read file and create hardlinks directly in checkpointLocation
+ try (Stream<String> s = Files.lines(hardLinkFile.toPath())) {
+ List<String> lines = s.collect(Collectors.toList());
+
+ // Create hardlinks directly in checkpointLocation
+ for (String l : lines) {
+ String[] parts = l.split(OzoneConsts.HARDLINK_SEPARATOR);
+ if (parts.length != 2) {
+ LOG.warn("Skipping malformed line in hardlink file: {}", l);
+ continue;
+ }
+ String to = parts[0]; // Destination path (relative)
+ String from = parts[1]; // Source path (relative to
checkpointLocation)
+
+ Path sourcePath = checkpointLocation.resolve(from).toAbsolutePath();
+ Path targetPath = checkpointLocation.resolve(to).toAbsolutePath();
+
+ // Track source file for later deletion
+ if (Files.exists(sourcePath)) {
+ sourceFilesToDelete.add(sourcePath);
+ }
+
+ // Make parent directory if it doesn't exist
+ Path parent = targetPath.getParent();
+ if (parent != null && !Files.exists(parent)) {
+ Files.createDirectories(parent);
+ }
+
+ // Create hardlink directly in checkpointLocation
+ Files.createLink(targetPath, sourcePath);
+ }
+
+ // Delete hardlink file
+ if (!hardLinkFile.delete()) {
+ throw new IOException("Failed to delete: " + hardLinkFile);
+ }
+
+ // Delete all source files after hardlinks are created
+ for (Path sourcePath : sourceFilesToDelete) {
+ try {
+ if (Files.isDirectory(sourcePath)) {
+ FileUtils.deleteDirectory(sourcePath.toFile());
+ } else {
+ Files.delete(sourcePath);
+ }
+ } catch (IOException e) {
+ LOG.warn("Failed to delete source file {}: {}", sourcePath,
e.getMessage());
+ // Continue with other files
+ }
+ }
+ }
+ }
+}
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBCheckpoint.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBCheckpoint.java
index 58e7069d143..dfa1bd9c3e9 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBCheckpoint.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDBCheckpoint.java
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.nio.file.Path;
+import javax.validation.constraints.NotNull;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -51,6 +52,7 @@ public RocksDBCheckpoint(Path checkpointLocation,
}
@Override
+ @NotNull
public Path getCheckpointLocation() {
return this.checkpointLocation;
}
diff --git
a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
index 254a9f39256..790932f2568 100644
---
a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
+++
b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.ozone.recon;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static
org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
+import static
org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -119,7 +119,7 @@ public void testReconGetsSnapshotFromLeader() throws
Exception {
String expectedUrl = "http://" +
(hostname.equals("0.0.0.0") ? "localhost" : hostname) + ":" +
ozoneManager.get().getHttpServer().getHttpAddress().getPort() +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
String snapshotUrl = impl.getOzoneManagerSnapshotUrl();
assertEquals(expectedUrl, snapshotUrl);
// Write some data
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
index 942bbb746f0..41a9f214ee7 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
@@ -96,6 +96,7 @@
import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
import org.apache.hadoop.hdds.utils.db.DBStore;
+import org.apache.hadoop.hdds.utils.db.InodeMetadataRocksDBCheckpoint;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.TestDataUtil;
@@ -353,8 +354,9 @@ public void testContentsOfTarballWithSnapshot(boolean
includeSnapshot) throws Ex
assertEquals(numSnapshots, actualYamlFiles,
"Number of generated YAML files should match the number of
snapshots.");
- // create hardlinks now
- OmSnapshotUtils.createHardLinks(newDbDir.toPath(), true);
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(newDbDir.toPath());
+ assertNotNull(obtainedCheckpoint);
if (includeSnapshot) {
List<String> yamlRelativePaths = snapshotPaths.stream().map(path -> {
@@ -366,7 +368,8 @@ public void testContentsOfTarballWithSnapshot(boolean
includeSnapshot) throws Ex
}).collect(Collectors.toList());
for (String yamlRelativePath : yamlRelativePaths) {
- String yamlFileName = Paths.get(newDbDir.getPath(),
yamlRelativePath).toString();
+ String yamlFileName = Paths.get(newDbDir.getPath(),
+ yamlRelativePath).toString();
assertTrue(Files.exists(Paths.get(yamlFileName)));
}
}
@@ -397,14 +400,11 @@ public void testSnapshotDBConsistency() throws Exception {
File newDbDir = new File(newDbDirName);
assertTrue(newDbDir.mkdirs());
FileUtil.unTar(tempFile, newDbDir);
- Set<Path> allPathsInTarball = getAllPathsInTarball(newDbDir);
- // create hardlinks now
- OmSnapshotUtils.createHardLinks(newDbDir.toPath(), false);
- for (Path old : allPathsInTarball) {
- assertTrue(old.toFile().delete());
- }
- Path snapshotDbDir = Paths.get(newDbDir.toPath().toString(),
OM_SNAPSHOT_CHECKPOINT_DIR,
- OM_DB_NAME + "-" + snapshotToModify.getSnapshotId());
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(newDbDir.toPath());
+ assertNotNull(obtainedCheckpoint);
+ Path snapshotDbDir = Paths.get(newDbDir.getPath(),
+ OM_SNAPSHOT_CHECKPOINT_DIR, OM_DB_NAME + "-" +
snapshotToModify.getSnapshotId());
assertTrue(Files.exists(snapshotDbDir));
String value = getValueFromSnapshotDeleteTable(dummyKey,
snapshotDbDir.toString());
assertNotNull(value);
@@ -590,10 +590,12 @@ public void testBootstrapOnFollowerConsistency() throws
Exception {
File newDbDir = new File(newDbDirName);
assertTrue(newDbDir.mkdirs());
FileUtil.unTar(tempFile, newDbDir);
- OmSnapshotUtils.createHardLinks(newDbDir.toPath(), true);
- Path snapshot1DbDir = Paths.get(newDbDir.toPath().toString(),
OM_SNAPSHOT_CHECKPOINT_DIR,
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(newDbDir.toPath());
+ assertNotNull(obtainedCheckpoint);
+ Path snapshot1DbDir = Paths.get(newDbDir.getPath(),
OM_SNAPSHOT_CHECKPOINT_DIR,
OM_DB_NAME + "-" + snapshot1.getSnapshotId());
- Path snapshot2DbDir = Paths.get(newDbDir.toPath().toString(),
OM_SNAPSHOT_CHECKPOINT_DIR,
+ Path snapshot2DbDir = Paths.get(newDbDir.getPath(),
OM_SNAPSHOT_CHECKPOINT_DIR,
OM_DB_NAME + "-" + snapshot2.getSnapshotId());
assertTrue(purgeEndTime.get() >= checkpointEndTime.get(),
"Purge should complete after checkpoint releases snapshot cache lock");
@@ -821,10 +823,12 @@ public void
testCheckpointIncludesSnapshotsFromFrozenState() throws Exception {
File newDbDir = new File(newDbDirName);
assertTrue(newDbDir.mkdirs());
FileUtil.unTar(tempFile, newDbDir);
- OmSnapshotUtils.createHardLinks(newDbDir.toPath(), true);
- Path snapshot1DbDir = Paths.get(newDbDir.toPath().toString(),
OM_SNAPSHOT_CHECKPOINT_DIR,
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(newDbDir.toPath());
+ assertNotNull(obtainedCheckpoint);
+ Path snapshot1DbDir = Paths.get(newDbDir.getPath(),
OM_SNAPSHOT_CHECKPOINT_DIR,
OM_DB_NAME + "-" + snapshot1.getSnapshotId());
- Path snapshot2DbDir = Paths.get(newDbDir.toPath().toString(),
OM_SNAPSHOT_CHECKPOINT_DIR,
+ Path snapshot2DbDir = Paths.get(newDbDir.getPath(),
OM_SNAPSHOT_CHECKPOINT_DIR,
OM_DB_NAME + "-" + snapshot2.getSnapshotId());
boolean snapshot1IncludedInCheckpoint = Files.exists(snapshot1DbDir);
boolean snapshot2IncludedInCheckpoint = Files.exists(snapshot2DbDir);
@@ -836,21 +840,6 @@ public void
testCheckpointIncludesSnapshotsFromFrozenState() throws Exception {
}
}
- private static Set<Path> getAllPathsInTarball(File newDbDir) throws
IOException {
- Set<Path> allPathsInTarball = new HashSet<>();
- try (Stream<Path> filesInTarball = Files.list(newDbDir.toPath())) {
- List<Path> files = filesInTarball.collect(Collectors.toList());
- for (Path p : files) {
- File file = p.toFile();
- if (file.getName().equals(OmSnapshotManager.OM_HARDLINK_FILE)) {
- continue;
- }
- allPathsInTarball.add(p);
- }
- }
- return allPathsInTarball;
- }
-
private void writeDummyKeyToDeleteTableOfSnapshotDB(OzoneSnapshot
snapshotToModify, String bucketName,
String volumeName, String keyName)
throws IOException {
@@ -969,13 +958,6 @@ private void populateInodesOfFilesInDirectory(DBStore
dbStore, Path dbLocation,
if (path.contains(OM_CHECKPOINT_DIR)) {
path =
metadataDir.relativize(dbStore.getDbLocation().toPath().resolve(p.getFileName())).toString();
}
- if (path.startsWith(OM_DB_NAME)) {
- Path fileName = Paths.get(path).getFileName();
- // fileName will not be null, added null check for findbugs
- if (fileName != null) {
- path = fileName.toString();
- }
- }
hardlinkMap.computeIfAbsent(inode, k -> new ArrayList<>()).add(path);
inodesFromOmDbCheckpoint.add(inode);
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
index 65fd0b5dfb4..c52aa5c91e4 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
@@ -65,6 +65,7 @@
import org.apache.hadoop.hdds.utils.HAUtils;
import org.apache.hadoop.hdds.utils.TransactionInfo;
import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
+import org.apache.hadoop.hdds.utils.db.InodeMetadataRocksDBCheckpoint;
import org.apache.hadoop.hdds.utils.db.RDBCheckpointUtils;
import org.apache.hadoop.hdds.utils.db.RDBStore;
import org.apache.hadoop.ozone.MiniOzoneCluster;
@@ -86,7 +87,6 @@
import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer;
import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServerConfig;
import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils;
-import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils;
import org.apache.hadoop.utils.FaultInjectorImpl;
import org.apache.ozone.test.GenericTestUtils;
import org.apache.ozone.test.GenericTestUtils.LogCapturer;
@@ -101,6 +101,7 @@
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.slf4j.event.Level;
/**
@@ -113,6 +114,9 @@ public class TestOMRatisSnapshots {
private static final String OM_SERVICE_ID = "om-service-test1";
private static final int NUM_OF_OMS = 3;
+ private static final Logger LOG =
+ LoggerFactory.getLogger(TestOMRatisSnapshots.class);
+
private MiniOzoneHAClusterImpl cluster = null;
private ObjectStore objectStore;
private OzoneConfiguration conf;
@@ -965,8 +969,14 @@ public void testInstallOldCheckpointFailure() throws
Exception {
// followerOM is already ahead of that transactionLogIndex and the OM
// state should be reloaded.
TermIndex followerTermIndex =
followerRatisServer.getLastAppliedTermIndex();
+ Path leaderCheckpointLocation = leaderDbCheckpoint.getCheckpointLocation();
+ assertNotNull(leaderCheckpointLocation);
+ Path omDbDir = leaderCheckpointLocation.resolve(OM_DB_NAME);
+ assertTrue(omDbDir.toFile().mkdir());
+ moveCheckpointContentsToOmDbDir(leaderCheckpointLocation, omDbDir);
+
TermIndex newTermIndex = followerOM.installCheckpoint(
- leaderOMNodeId, leaderDbCheckpoint);
+ leaderOMNodeId, leaderCheckpointLocation);
String errorMsg = "Cannot proceed with InstallSnapshot as OM is at " +
"TermIndex " + followerTermIndex + " and checkpoint has lower " +
@@ -1006,14 +1016,18 @@ public void testInstallCorruptedCheckpointFailure()
throws Exception {
DBCheckpoint leaderDbCheckpoint = leaderOM.getMetadataManager().getStore()
.getCheckpoint(false);
Path leaderCheckpointLocation = leaderDbCheckpoint.getCheckpointLocation();
- OmSnapshotUtils.createHardLinks(leaderCheckpointLocation, true);
+ assertNotNull(leaderCheckpointLocation);
+ Path omDbDir = leaderCheckpointLocation.resolve(OM_DB_NAME);
+ assertTrue(omDbDir.toFile().mkdir());
+ moveCheckpointContentsToOmDbDir(leaderCheckpointLocation, omDbDir);
+
TransactionInfo leaderCheckpointTrxnInfo = OzoneManagerRatisUtils
- .getTrxnInfoFromCheckpoint(conf, leaderCheckpointLocation);
+ .getTrxnInfoFromCheckpoint(conf, omDbDir);
// Corrupt the leader checkpoint and install that on the OM. The
// operation should fail and OM should shutdown.
boolean delete = true;
- File[] files = leaderCheckpointLocation.toFile().listFiles();
+ File[] files = omDbDir.toFile().listFiles();
assertNotNull(files);
for (File file : files) {
if (file.getName().contains(".sst")) {
@@ -1040,6 +1054,61 @@ public void testInstallCorruptedCheckpointFailure()
throws Exception {
assertLogCapture(logCapture, msg);
}
+ /**
+ * Moves all contents from the checkpoint location into the omDbDir.
+ * This reorganizes the checkpoint structure so that all checkpoint files
+ * are contained within the om.db directory.
+ *
+ * @param checkpointLocation the source checkpoint location containing
files/directories
+ * @param omDbDir the target directory (om.db) where contents should be moved
+ * @throws IOException if file operations fail
+ */
+ private void moveCheckpointContentsToOmDbDir(Path checkpointLocation, Path
omDbDir)
+ throws IOException {
+ File checkpointLocationFile = checkpointLocation.toFile();
+ File omDbDirFile = omDbDir.toFile();
+
+ // Ensure omDbDir exists
+ if (!omDbDirFile.exists()) {
+ if (!omDbDirFile.mkdirs()) {
+ throw new IOException("Failed to create directory: " + omDbDir);
+ }
+ }
+
+ if (!checkpointLocationFile.exists() ||
!checkpointLocationFile.isDirectory()) {
+ throw new IOException("Checkpoint location does not exist or is not a
directory: " + checkpointLocation);
+ }
+
+ // Move all contents from checkpointLocation to omDbDir
+ File[] contents = checkpointLocationFile.listFiles();
+ if (contents != null) {
+ for (File item : contents) {
+ String name = item != null ? item.getName() : null;
+ Path fileName = omDbDir.getFileName();
+ // Skip the target directory itself if it already exists in source
+ assertNotNull(name);
+ assertNotNull(fileName);
+ if (name.equals(fileName.toString())) {
+ continue;
+ }
+
+ Path targetPath = omDbDir.resolve(item.getName());
+
+ // Delete target if it exists
+ if (Files.exists(targetPath)) {
+ if (Files.isDirectory(targetPath)) {
+ FileUtils.deleteDirectory(targetPath.toFile());
+ } else {
+ Files.delete(targetPath);
+ }
+ }
+
+ // Move item to target - Files.move handles both files and directories
recursively
+ Files.move(item.toPath(), targetPath);
+ }
+ }
+ }
+
private SnapshotInfo createOzoneSnapshot(OzoneManager leaderOM, String name)
throws IOException {
objectStore.createSnapshot(volumeName, bucketName, name);
@@ -1162,6 +1231,7 @@ public void pause() throws IOException {
// tarballs.
if (count == 1) {
long sstSize = getSizeOfSstFiles(tarball);
+ LOG.info("Setting ozone.om.ratis.snapshot.max.total.sst.size to {}",
sstSize);
om.getConfiguration().setLong(
OZONE_OM_RATIS_SNAPSHOT_MAX_TOTAL_SST_SIZE_KEY, sstSize / 2);
// Now empty the tarball to restart the download
@@ -1177,15 +1247,22 @@ public void pause() throws IOException {
// Get Size of sstfiles in tarball.
private long getSizeOfSstFiles(File tarball) throws IOException {
FileUtil.unTar(tarball, tempDir.toFile());
- OmSnapshotUtils.createHardLinks(tempDir, true);
- List<Path> sstPaths = Files.list(tempDir).collect(Collectors.toList());
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(tempDir);
+ assertNotNull(obtainedCheckpoint);
+ Path omDbDir =
Paths.get(obtainedCheckpoint.getCheckpointLocation().toString(), OM_DB_NAME);
+ assertNotNull(omDbDir);
+ List<Path> sstPaths = Files.list(omDbDir).collect(Collectors.toList());
long totalFileSize = 0;
+ int numFiles = 0;
for (Path sstPath : sstPaths) {
File file = sstPath.toFile();
if (file.isFile() && file.getName().endsWith(".sst")) {
totalFileSize += Files.size(sstPath);
+ numFiles++;
}
}
+ LOG.info("Total num files {}", numFiles);
return totalFileSize;
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotProvider.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotProvider.java
index 211d6e61db8..53f9a7a7d13 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotProvider.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestOzoneManagerSnapshotProvider.java
@@ -18,14 +18,19 @@
package org.apache.hadoop.ozone.om.snapshot;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.utils.IOUtils;
import org.apache.hadoop.hdds.utils.TransactionInfo;
import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
+import org.apache.hadoop.hdds.utils.db.InodeMetadataRocksDBCheckpoint;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
+import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.client.ObjectStore;
import org.apache.hadoop.ozone.client.OzoneClient;
import org.apache.hadoop.ozone.client.OzoneClientFactory;
@@ -117,12 +122,18 @@ public void testDownloadCheckpoint() throws Exception {
private long getDownloadedSnapshotIndex(DBCheckpoint dbCheckpoint)
throws Exception {
-
- OmSnapshotUtils.createHardLinks(dbCheckpoint.getCheckpointLocation(),
true);
+ Path checkpointLocation = dbCheckpoint.getCheckpointLocation();
+ assertNotNull(checkpointLocation);
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(checkpointLocation);
+ assertNotNull(obtainedCheckpoint);
+ Path omDbLocation = Paths.get(checkpointLocation.toString(),
+ OzoneConsts.OM_DB_NAME
+ );
TransactionInfo trxnInfoFromCheckpoint =
OzoneManagerRatisUtils.getTrxnInfoFromCheckpoint(conf,
- dbCheckpoint.getCheckpointLocation());
+ omDbLocation);
return trxnInfoFromCheckpoint.getTransactionIndex();
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
index 1a5b015b2bd..7e8a2e1c96f 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
@@ -22,7 +22,6 @@
import static org.apache.hadoop.hdds.utils.Archiver.tar;
import static
org.apache.hadoop.hdds.utils.HddsServerUtil.includeRatisSnapshotCompleteFlag;
import static org.apache.hadoop.ozone.OzoneConsts.OM_CHECKPOINT_DIR;
-import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
import static
org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST;
import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX;
import static
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_SNAPSHOT_MAX_TOTAL_SST_SIZE_DEFAULT;
@@ -408,13 +407,6 @@ private static void writeHardlinkFile(OzoneConfiguration
conf, Map<String, Strin
Path p = Paths.get(entry.getKey());
String fileId = entry.getValue();
Path relativePath = metaDirPath.relativize(p);
- // if the file is in "om.db" directory, strip off the 'o
- // m.db' name from the path
- // and only keep the file name as this would be created in the current
dir of the untarred dir
- // on the follower.
- if (relativePath.startsWith(OM_DB_NAME)) {
- relativePath = relativePath.getFileName();
- }
sb.append(relativePath).append('\t').append(fileId).append('\n');
}
Files.write(data, sb.toString().getBytes(StandardCharsets.UTF_8),
StandardOpenOption.TRUNCATE_EXISTING);
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index 19823796e70..343c46158dc 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -42,10 +42,10 @@
import static org.apache.hadoop.ozone.OzoneConsts.DB_TRANSIENT_MARKER;
import static org.apache.hadoop.ozone.OzoneConsts.DEFAULT_OM_UPDATE_ID;
import static org.apache.hadoop.ozone.OzoneConsts.LAYOUT_VERSION_KEY;
+import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
import static org.apache.hadoop.ozone.OzoneConsts.OM_METRICS_FILE;
import static org.apache.hadoop.ozone.OzoneConsts.OM_METRICS_TEMP_FILE;
-import static org.apache.hadoop.ozone.OzoneConsts.OM_SNAPSHOT_DIR;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_RATIS_SNAPSHOT_DIR;
import static org.apache.hadoop.ozone.OzoneConsts.PREPARE_MARKER_KEY;
import static org.apache.hadoop.ozone.OzoneConsts.RPC_PORT;
@@ -150,6 +150,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
import javax.management.ObjectName;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
@@ -286,7 +287,6 @@
import org.apache.hadoop.ozone.om.service.DirectoryDeletingService;
import org.apache.hadoop.ozone.om.service.OMRangerBGSyncService;
import org.apache.hadoop.ozone.om.service.QuotaRepairTask;
-import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils;
import org.apache.hadoop.ozone.om.snapshot.defrag.SnapshotDefragService;
import org.apache.hadoop.ozone.om.upgrade.OMLayoutFeature;
import org.apache.hadoop.ozone.om.upgrade.OMLayoutVersionManager;
@@ -4029,31 +4029,39 @@ public synchronized TermIndex
installSnapshotFromLeader(String leaderId) {
TermIndex termIndex = null;
try {
- // Install hard links.
- OmSnapshotUtils.createHardLinks(omDBCheckpoint.getCheckpointLocation(),
false);
- termIndex = installCheckpoint(leaderId, omDBCheckpoint);
+ Path checkpointLocation = omDBCheckpoint.getCheckpointLocation();
+ if (checkpointLocation == null) {
+ throw new IOException("Cannot install checkpoint from leader " +
leaderId + ": checkpointLocation is null");
+ }
+ termIndex = installCheckpoint(leaderId, checkpointLocation);
} catch (Exception ex) {
LOG.error("Failed to install snapshot from Leader OM.", ex);
+ } finally {
+ try {
+ omDBCheckpoint.cleanupCheckpoint();
+ } catch (IOException e) {
+ LOG.error("Failed to cleanup checkpoint at {}",
omDBCheckpoint.getCheckpointLocation(), e);
+ }
}
return termIndex;
}
/**
- * Install checkpoint. If the checkpoint snapshot index is greater than
+ * Install checkpoint obtained from leader using the dbCheckpoint endpoint.
+ * The unpacked directory after installing hardlinks would comprise of
+ * both active OM DB dir and the db.snapshots directory.
+ * If the checkpoint snapshot index is greater than
* OM's last applied transaction index, then re-initialize the OM
* state via this checkpoint. Before re-initializing OM state, the OM Ratis
- * server should be stopped so that no new transactions can be applied.
+ * server should be stopped so that no new transactions can be applied
*/
- TermIndex installCheckpoint(String leaderId, DBCheckpoint omDBCheckpoint)
+ TermIndex installCheckpoint(String leaderId, Path checkpointLocation)
throws Exception {
-
- Path checkpointLocation = omDBCheckpoint.getCheckpointLocation();
+ Path omDbPath = Paths.get(checkpointLocation.toString(), OM_DB_NAME);
TransactionInfo checkpointTrxnInfo = OzoneManagerRatisUtils
- .getTrxnInfoFromCheckpoint(configuration, checkpointLocation);
-
+ .getTrxnInfoFromCheckpoint(configuration, omDbPath);
LOG.info("Installing checkpoint with OMTransactionInfo {}",
checkpointTrxnInfo);
-
return installCheckpoint(leaderId, checkpointLocation, checkpointTrxnInfo);
}
@@ -4061,6 +4069,7 @@ TermIndex installCheckpoint(String leaderId, Path
checkpointLocation,
TransactionInfo checkpointTrxnInfo) throws Exception {
long startTime = Time.monotonicNow();
File oldDBLocation = metadataManager.getStore().getDbLocation();
+ Path omDbPath = Paths.get(checkpointLocation.toString(), OM_DB_NAME);
try {
// Stop Background services
keyManager.stop();
@@ -4091,7 +4100,7 @@ TermIndex installCheckpoint(String leaderId, Path
checkpointLocation,
// server so that the OM state can be re-initialized. If no then do not
// proceed with installSnapshot.
boolean canProceed = OzoneManagerRatisUtils.verifyTransactionInfo(
- checkpointTrxnInfo, lastAppliedIndex, leaderId, checkpointLocation);
+ checkpointTrxnInfo, lastAppliedIndex, leaderId, omDbPath);
boolean oldOmMetadataManagerStopped = false;
boolean newMetadataManagerStarted = false;
@@ -4102,15 +4111,13 @@ TermIndex installCheckpoint(String leaderId, Path
checkpointLocation,
omRpcServer.stop();
isOmRpcServerRunning = false;
omRpcServerStopped = true;
- LOG.info("RPC server is stopped. Spend " +
- (Time.monotonicNow() - time) + " ms.");
+ LOG.info("RPC server is stopped. Spend {} ms.", Time.monotonicNow() -
time);
try {
// Stop old metadataManager before replacing DB Dir
time = Time.monotonicNow();
metadataManager.stop();
oldOmMetadataManagerStopped = true;
- LOG.info("metadataManager is stopped. Spend " +
- (Time.monotonicNow() - time) + " ms.");
+ LOG.info("metadataManager is stopped. Spend {} ms.",
Time.monotonicNow() - time);
} catch (Exception e) {
String errorMsg = "Failed to stop metadataManager. Cannot proceed " +
"with installing the new checkpoint.";
@@ -4119,8 +4126,7 @@ TermIndex installCheckpoint(String leaderId, Path
checkpointLocation,
}
try {
time = Time.monotonicNow();
- dbBackup = replaceOMDBWithCheckpoint(lastAppliedIndex,
- oldDBLocation, checkpointLocation);
+ dbBackup = replaceOMDBWithCheckpoint(lastAppliedIndex, oldDBLocation,
checkpointLocation);
term = checkpointTrxnInfo.getTerm();
lastAppliedIndex = checkpointTrxnInfo.getTransactionIndex();
LOG.info("Replaced DB with checkpoint from OM: {}, term: {}, " +
@@ -4175,8 +4181,7 @@ TermIndex installCheckpoint(String leaderId, Path
checkpointLocation,
omRpcServer = getRpcServer(configuration);
omRpcServer.start();
isOmRpcServerRunning = true;
- LOG.info("RPC server is re-started. Spend " +
- (Time.monotonicNow() - time) + " ms.");
+ LOG.info("RPC server is re-started. Spend {} ms.", Time.monotonicNow()
- time);
} catch (Exception e) {
String errorMsg = "Failed to start RPC Server.";
exitManager.exitSystem(1, errorMsg, e, LOG);
@@ -4225,89 +4230,153 @@ private void stopTrashEmptier() {
}
/**
- * Replace the current OM DB with the new DB checkpoint.
+ * Replaces the OM DB with checkpoint data from leader.
+ * Only backs up and replaces directories/files that are present in
checkpointLocation.
*
- * @param lastAppliedIndex the last applied index in the current OM DB.
- * @param checkpointPath path to the new DB checkpoint
- * @return location of backup of the original DB
+ * @param lastAppliedIndex last applied transaction index
+ * @param oldDB current DB directory
+ * @param checkpointLocation checkpoint data directory from leader
+ * @return backup directory containing original state (only changed items)
+ * @throws IOException if operations fail
*/
- File replaceOMDBWithCheckpoint(long lastAppliedIndex, File oldDB,
- Path checkpointPath) throws IOException {
+ File replaceOMDBWithCheckpoint(long lastAppliedIndex, File oldDB, Path
checkpointLocation)
+ throws IOException {
- // Take a backup of the current DB
- String dbBackupName = OzoneConsts.OM_DB_BACKUP_PREFIX +
- lastAppliedIndex + "_" + System.currentTimeMillis();
+ // Create single parent backup directory
+ String dbBackupName = OzoneConsts.OM_DB_BACKUP_PREFIX + lastAppliedIndex
+ + "_" + System.currentTimeMillis();
File dbDir = oldDB.getParentFile();
-
- // Backup the active fs and snapshot dirs.
File dbBackupDir = new File(dbDir, dbBackupName);
+
if (!dbBackupDir.mkdirs()) {
- throw new IOException("Failed to make db backup dir: " +
- dbBackupDir);
+ throw new IOException("Failed to create backup directory: " +
dbBackupDir);
}
- File dbBackup = new File(dbBackupDir, oldDB.getName());
- File dbSnapshotsDir = new File(dbDir, OM_SNAPSHOT_DIR);
- File dbSnapshotsBackup = new File(dbBackupDir, OM_SNAPSHOT_DIR);
- Files.move(oldDB.toPath(), dbBackup.toPath());
- if (dbSnapshotsDir.exists()) {
- Files.move(dbSnapshotsDir.toPath(),
- dbSnapshotsBackup.toPath());
+
+ // Only backup items that are present in checkpointLocation
+ // Track what we backed up for selective restore
+ Set<String> backedUpItems = new HashSet<>();
+
+ if (Files.exists(checkpointLocation) &&
Files.isDirectory(checkpointLocation)) {
+ try (Stream<Path> checkpointContents = Files.list(checkpointLocation)) {
+ for (Path checkpointItem :
checkpointContents.collect(Collectors.toList())) {
+ Path fileName = checkpointItem.getFileName();
+ if (fileName == null) {
+ LOG.warn("Skipping path with no file name: {}", checkpointItem);
+ continue;
+ }
+ String itemName = fileName.toString();
+ // Skip backup directories, raft logs, and marker files
+ if (itemName.startsWith(OzoneConsts.OM_DB_BACKUP_PREFIX)
+ || itemName.equals(DB_TRANSIENT_MARKER) ||
+ itemName.equals(OZONE_RATIS_SNAPSHOT_DIR) ||
+ itemName.equals(new File(getRatisLogDirectory()).getName())) {
+ continue;
+ }
+
+ // Only backup if this item exists in dbDir
+ Path existingItem = dbDir.toPath().resolve(itemName);
+ if (Files.exists(existingItem)) {
+ Path backupTarget = dbBackupDir.toPath().resolve(itemName);
+ Files.move(existingItem, backupTarget);
+ backedUpItems.add(itemName);
+ }
+ }
+ }
}
- moveCheckpointFiles(oldDB, checkpointPath, dbDir, dbBackup, dbSnapshotsDir,
- dbSnapshotsBackup);
+ // Move checkpoint files (only items present in checkpointLocation)
+ moveCheckpointFiles(oldDB, checkpointLocation, dbDir, dbBackupDir,
backedUpItems);
+
return dbBackupDir;
}
- private void moveCheckpointFiles(File oldDB, Path checkpointPath, File dbDir,
- File dbBackup, File dbSnapshotsDir,
- File dbSnapshotsBackup) throws IOException {
- // Move the new DB checkpoint into the om metadata dir
+ /**
+ * Moves contents from checkpointLocation to dbDir, replacing existing
files/dirs.
+ * Only moves items that are present in checkpointLocation.
+ * Uses a single parent backup for rollback on failure.
+ *
+ * @param oldDB the old DB directory (will be replaced)
+ * @param checkpointLocation source directory containing checkpoint data
+ * @param dbDir target directory (parent of oldDB)
+ * @param dbBackupDir backup directory containing the original state
+ * @param backedUpItems set of item names that were backed up (for selective
restore)
+ * @throws IOException if file operations fail
+ */
+ private void moveCheckpointFiles(File oldDB, Path checkpointLocation, File
dbDir,
+ File dbBackupDir, Set<String> backedUpItems) throws IOException {
Path markerFile = new File(dbDir, DB_TRANSIENT_MARKER).toPath();
try {
- // Create a Transient Marker file. This file will be deleted if the
- // checkpoint DB is successfully moved to the old DB location or if the
- // old DB backup is reset to its location. If not, then the OM DB is in
- // an inconsistent state and this marker file will fail OM from
- // starting up.
+ // Create transient marker file
Files.createFile(markerFile);
- // Link each of the candidate DB files to real DB directory. This
- // preserves the links that already exist between files in the
- // candidate db.
- OmSnapshotUtils.linkFiles(checkpointPath.toFile(),
- oldDB);
- moveOmSnapshotData(oldDB.toPath(), dbSnapshotsDir.toPath());
+ // Move everything from checkpointLocation to dbDir, replacing existing
+ if (!Files.exists(checkpointLocation) ||
!Files.isDirectory(checkpointLocation)) {
+ throw new IOException("Checkpoint data directory does not exist: " +
checkpointLocation);
+ }
+ try (Stream<Path> checkpointContents = Files.list(checkpointLocation)) {
+ for (Path sourcePath :
checkpointContents.collect(Collectors.toList())) {
+ Path fileName = sourcePath.getFileName();
+ if (fileName == null) {
+ LOG.warn("Skipping path with no file name: {}", sourcePath);
+ continue;
+ }
+ String itemName = fileName.toString();
+ // Skip backup directories, raft logs, and marker files
+ if (itemName.startsWith(OzoneConsts.OM_DB_BACKUP_PREFIX) ||
+ itemName.equals(DB_TRANSIENT_MARKER) ||
+ itemName.equals(OZONE_RATIS_SNAPSHOT_DIR) ||
+ itemName.equals(new File(getRatisLogDirectory()).getName())) {
+ continue;
+ }
+ Path targetPath = dbDir.toPath().resolve(itemName);
+ if (Files.exists(targetPath)) {
+ throw new IOException("Cannnot move checkpoint data into target." +
+ "Checkpoint data already exists: " + targetPath);
+ }
+ // Move source to target
+ Files.move(sourcePath, targetPath);
+ }
+ }
+ // Success - delete marker file
Files.deleteIfExists(markerFile);
} catch (IOException e) {
- LOG.error("Failed to move downloaded DB checkpoint {} to metadata " +
- "directory {}. Exception: {}. Resetting to original DB.",
- checkpointPath, oldDB.toPath(), e);
+ LOG.error("Failed to move checkpoint data from {} to {}. " +
+ "Restoring from backup.",
+ checkpointLocation, dbDir, e);
+ // Rollback: restore only the items that were backed up
try {
- FileUtil.fullyDelete(oldDB);
- Files.move(dbBackup.toPath(), oldDB.toPath());
- if (dbSnapshotsBackup.exists()) {
- Files.move(dbSnapshotsBackup.toPath(), dbSnapshotsDir.toPath());
+ // Delete only the items that were replaced
+ for (String itemName : backedUpItems) {
+ Path targetPath = dbDir.toPath().resolve(itemName);
+ if (Files.exists(targetPath)) {
+ if (Files.isDirectory(targetPath)) {
+ FileUtil.fullyDelete(targetPath.toFile());
+ } else {
+ Files.delete(targetPath);
+ }
+ }
+ }
+ // Restore from backup - only restore items that were backed up
+ if (dbBackupDir.exists() && dbBackupDir.isDirectory()) {
+ File[] backupContents = dbBackupDir.listFiles();
+ if (backupContents != null) {
+ for (File backupItem : backupContents) {
+ String itemName = backupItem.getName();
+ if (backedUpItems.contains(itemName)) {
+ Path targetPath = dbDir.toPath().resolve(itemName);
+ Files.move(backupItem.toPath(), targetPath);
+ }
+ }
+ }
}
Files.deleteIfExists(markerFile);
} catch (IOException ex) {
- String errorMsg = "Failed to reset to original DB. OM is in an " +
- "inconsistent state.";
+ String errorMsg = "Failed to restore from backup. OM is in an
inconsistent state.";
exitManager.exitSystem(1, errorMsg, ex, LOG);
}
throw e;
}
}
- // Move the new snapshot directory into place.
- private void moveOmSnapshotData(Path dbPath, Path dbSnapshotsDir)
- throws IOException {
- Path incomingSnapshotsDir = Paths.get(dbPath.toString(),
- OM_SNAPSHOT_DIR);
- if (incomingSnapshotsDir.toFile().exists()) {
- Files.move(incomingSnapshotsDir, dbSnapshotsDir);
- }
- }
-
/**
* Re-instantiate MetadataManager with new DB checkpoint.
* All the classes which use/ store MetadataManager should also be updated
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis_snapshot/OmRatisSnapshotProvider.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis_snapshot/OmRatisSnapshotProvider.java
index ef0a46548a4..8d6baf02022 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis_snapshot/OmRatisSnapshotProvider.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis_snapshot/OmRatisSnapshotProvider.java
@@ -36,6 +36,7 @@
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@@ -46,6 +47,8 @@
import org.apache.hadoop.hdds.utils.HAUtils;
import org.apache.hadoop.hdds.utils.LegacyHadoopConfigurationSource;
import org.apache.hadoop.hdds.utils.RDBSnapshotProvider;
+import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
+import org.apache.hadoop.hdds.utils.db.InodeMetadataRocksDBCheckpoint;
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
import org.apache.hadoop.ozone.om.helpers.OMNodeDetails;
import org.apache.hadoop.security.SecurityUtil;
@@ -207,6 +210,11 @@ public static void downloadFileWithProgress(InputStream
inputStream, File target
}
}
+ @Override
+ public DBCheckpoint getCheckpointFromUntarredDb(Path untarredDbDir) throws
IOException {
+ return new InodeMetadataRocksDBCheckpoint(untarredDbDir);
+ }
+
/**
* Writes form data to output stream as any HTTP client would for a
* multipart/form-data request.
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java
index 728d4a8e9ce..ceb1d143ea2 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/OmSnapshotUtils.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.ozone.om.snapshot;
import static org.apache.hadoop.hdds.utils.IOUtils.getINode;
+import static org.apache.hadoop.ozone.OzoneConsts.HARDLINK_SEPARATOR;
import static org.apache.hadoop.ozone.OzoneConsts.OM_CHECKPOINT_DIR;
import java.io.File;
@@ -27,14 +28,10 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileTime;
-import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
-import org.apache.hadoop.ozone.om.OmSnapshotManager;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
/**
* Ozone Manager Snapshot Utilities.
@@ -43,8 +40,7 @@ public final class OmSnapshotUtils {
public static final String DATA_PREFIX = "data";
public static final String DATA_SUFFIX = "txt";
- private static final Logger LOG =
- LoggerFactory.getLogger(OmSnapshotUtils.class);
+ public static final String PATH_SEPARATOR = "/";
private OmSnapshotUtils() { }
@@ -106,66 +102,13 @@ public static Path createHardLinkList(int truncateLength,
fixedFile = f.toString();
}
}
- sb.append(truncateFileName(truncateLength, entry.getKey())).append('\t')
+ sb.append(truncateFileName(truncateLength,
entry.getKey())).append(HARDLINK_SEPARATOR)
.append(fixedFile).append('\n');
}
Files.write(data, sb.toString().getBytes(StandardCharsets.UTF_8));
return data;
}
- /**
- * Create hard links listed in OM_HARDLINK_FILE.
- *
- * @param dbPath Path to db to have links created.
- * @param deleteSourceFiles - Whether to delete the source files after
creating the links.
- */
- public static void createHardLinks(Path dbPath, boolean deleteSourceFiles)
throws IOException {
- File hardLinkFile =
- new File(dbPath.toString(), OmSnapshotManager.OM_HARDLINK_FILE);
- List<Path> filesToDelete = new ArrayList<>();
- if (hardLinkFile.exists()) {
- // Read file.
- try (Stream<String> s = Files.lines(hardLinkFile.toPath())) {
- List<String> lines = s.collect(Collectors.toList());
-
- // Create a link for each line.
- for (String l : lines) {
- String[] parts = l.split("\t");
- if (parts.length != 2) {
- LOG.warn("Skipping malformed line in hardlink file: {}", l);
- continue;
- }
- String from = parts[1];
- String to = parts[0];
- Path fullFromPath = Paths.get(dbPath.toString(), from);
- filesToDelete.add(fullFromPath);
- Path fullToPath = Paths.get(dbPath.toString(), to);
- // Make parent dir if it doesn't exist.
- Path parent = fullToPath.getParent();
- if ((parent != null) && (!parent.toFile().exists())) {
- if (!parent.toFile().mkdirs()) {
- throw new IOException(
- "Failed to create directory: " + parent.toString());
- }
- }
- Files.createLink(fullToPath, fullFromPath);
- }
- if (!hardLinkFile.delete()) {
- throw new IOException("Failed to delete: " + hardLinkFile);
- }
- }
- }
- if (deleteSourceFiles) {
- for (Path fileToDelete : filesToDelete) {
- try {
- Files.deleteIfExists(fileToDelete);
- } catch (IOException e) {
- LOG.warn("Couldn't delete source file {} while unpacking the DB",
fileToDelete, e);
- }
- }
- }
- }
-
/**
* Link each of the files in oldDir to newDir.
*
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java
index 3750c430c14..dd7195802ed 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/TestOmSnapshotManager.java
@@ -65,6 +65,7 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.HddsWhiteboxTestUtils;
import org.apache.hadoop.hdds.utils.db.DBStore;
+import org.apache.hadoop.hdds.utils.db.InodeMetadataRocksDBCheckpoint;
import org.apache.hadoop.hdds.utils.db.RDBBatchOperation;
import org.apache.hadoop.hdds.utils.db.RDBStore;
import org.apache.hadoop.hdds.utils.db.Table;
@@ -378,20 +379,26 @@ public void testHardLinkCreation() throws IOException {
Files.move(hardLinkList, Paths.get(candidateDir.toString(),
OM_HARDLINK_FILE));
- // Pointers to follower links to be created.
- File f1FileLink = new File(followerSnapDir2, "f1.sst");
- File s1FileLink = new File(followerSnapDir2, "s1.sst");
+ Path snapshot2Path = Paths.get(candidateDir.getPath(),
+ OM_SNAPSHOT_CHECKPOINT_DIR, followerSnapDir2.getName());
+ // Pointers to follower links to be created.
+ File f1FileLink = new File(snapshot2Path.toFile(), "f1.sst");
+ File s1FileLink = new File(snapshot2Path.toFile(), "s1.sst");
+ Object s1FileInode = getINode(s1File.toPath());
+ Object f1FileInode = getINode(f1File.toPath());
// Create links on the follower from list.
- OmSnapshotUtils.createHardLinks(candidateDir.toPath(), false);
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(candidateDir.toPath());
+ assertNotNull(obtainedCheckpoint);
// Confirm expected follower links.
assertTrue(s1FileLink.exists());
- assertEquals(getINode(s1File.toPath()),
+ assertEquals(s1FileInode,
getINode(s1FileLink.toPath()), "link matches original file");
assertTrue(f1FileLink.exists());
- assertEquals(getINode(f1File.toPath()),
+ assertEquals(f1FileInode,
getINode(f1FileLink.toPath()), "link matches original file");
}
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/defrag/TestInodeMetadataRocksDBCheckpoint.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/defrag/TestInodeMetadataRocksDBCheckpoint.java
new file mode 100644
index 00000000000..9a8816f9e46
--- /dev/null
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/defrag/TestInodeMetadataRocksDBCheckpoint.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.om.snapshot.defrag;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.hadoop.hdds.utils.IOUtils.getINode;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.nio.file.Files;
+import org.apache.hadoop.hdds.utils.db.InodeMetadataRocksDBCheckpoint;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * Class to test InodeMetadataRocksDBCheckpoint.
+ */
+public class TestInodeMetadataRocksDBCheckpoint {
+
+ /**
+ * Test createHardLinks().
+ */
+ @Test
+ public void testCreateHardLinksWithOmDbPrefix(@TempDir File tempDir) throws
Exception {
+ // Create a test dir inside temp dir
+ File testDir = new File(tempDir, "testDir");
+ assertTrue(testDir.mkdir(), "Failed to create test dir");
+ // Create source file
+ File sourceFile = new File(testDir, "source.sst");
+ Files.write(sourceFile.toPath(), "test content".getBytes(UTF_8));
+
+ // Create hardlink file with "om.db/" prefixed paths
+ File hardlinkFile = new File(testDir, "hardLinkFile"); //
OmSnapshotManager.OM_HARDLINK_FILE
+ String hardlinkContent =
+ "om.db/target1.sst\tsource.sst\n" +
+ "target2.sst\tsource.sst\n";
+ Files.write(hardlinkFile.toPath(), hardlinkContent.getBytes(UTF_8));
+ Object sourceFileInode = getINode(sourceFile.toPath());
+ // Execute createHardLinks
+ InodeMetadataRocksDBCheckpoint obtainedCheckpoint =
+ new InodeMetadataRocksDBCheckpoint(testDir.toPath());
+ assertNotNull(obtainedCheckpoint);
+
+ // Verify hard links created correctly
+ File target1 = new File(testDir, "om.db/target1.sst");
+ File target2 = new File(testDir, "target2.sst");
+
+ assertTrue(target1.exists(),
+ "Hard link should be created");
+ assertTrue(target2.exists(),
+ "Hard link should be created");
+
+ // Verify content is same using inode comparison
+ assertEquals(sourceFileInode, getINode(target1.toPath()),
+ "Hard links should have same inode as source");
+ assertEquals(sourceFileInode, getINode(target2.toPath()),
+ "Hard links should have same inode as source");
+ }
+}
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
index 939a3b16df8..24fabcf3491 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.ozone.recon.spi.impl;
import static
org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DB_DIRS_PERMISSIONS_DEFAULT;
-import static
org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
+import static
org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
import static
org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_AUTH_TYPE;
import static
org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_SNAPSHOT_DB;
@@ -84,7 +84,6 @@
import org.apache.hadoop.ozone.om.OMMetadataManager;
import org.apache.hadoop.ozone.om.helpers.DBUpdates;
import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
-import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.DBUpdatesRequest;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.ServicePort.Type;
import org.apache.hadoop.ozone.recon.ReconContext;
@@ -197,11 +196,11 @@ public OzoneManagerServiceProviderImpl(
HttpConfig.Policy policy = HttpConfig.getHttpPolicy(configuration);
omDBSnapshotUrl = "http://" + ozoneManagerHttpAddress +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
if (policy.isHttpsEnabled()) {
omDBSnapshotUrl = "https://" + ozoneManagerHttpsAddress +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
}
boolean flushParam = configuration.getBoolean(
@@ -426,7 +425,7 @@ public String getOzoneManagerSnapshotUrl() throws
IOException {
omLeaderUrl = (policy.isHttpsEnabled() ?
"https://" + info.getServiceAddress(Type.HTTPS) :
"http://" + info.getServiceAddress(Type.HTTP)) +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
}
}
}
@@ -483,7 +482,6 @@ public DBCheckpoint getOzoneManagerDBSnapshot() {
try (InputStream inputStream = reconUtils.makeHttpCall(
connectionFactory, getOzoneManagerSnapshotUrl(),
isOmSpnegoEnabled()).getInputStream()) {
tarExtractor.extractTar(inputStream, untarredDbDir);
- OmSnapshotUtils.createHardLinks(untarredDbDir, true);
} catch (IOException | InterruptedException e) {
reconContext.updateHealthStatus(new AtomicBoolean(false));
reconContext.updateErrors(ReconContext.ErrorCode.GET_OM_DB_SNAPSHOT_FAILED);
@@ -491,7 +489,6 @@ connectionFactory, getOzoneManagerSnapshotUrl(),
isOmSpnegoEnabled()).getInputSt
}
return null;
});
-
// Validate extracted files
File[] sstFiles = untarredDbDir.toFile().listFiles((dir, name) ->
name.endsWith(".sst"));
if (sstFiles != null && sstFiles.length > 0) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]