This is an automated email from the ASF dual-hosted git repository.

swamirishi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d6f3a5a769 HDDS-13772. Snapshot Paths to be re read from om 
checkpoint db inside lock again. (#9131)
4d6f3a5a769 is described below

commit 4d6f3a5a769cca01b98b69a89f80f1da6e6eced0
Author: Sadanand Shenoy <[email protected]>
AuthorDate: Sun Nov 2 19:49:07 2025 +0530

    HDDS-13772. Snapshot Paths to be re read from om checkpoint db inside lock 
again. (#9131)
    
    Co-authored-by: Sadanand Shenoy <[email protected]>
---
 .../TestOMDbCheckpointServletInodeBasedXfer.java   | 103 ++++++++++++++++++++-
 .../om/OMDBCheckpointServletInodeBasedXfer.java    |  32 ++++---
 2 files changed, 123 insertions(+), 12 deletions(-)

diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
index 0f5c8bae4b4..f2b94182c80 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServletInodeBasedXfer.java
@@ -21,6 +21,7 @@
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS;
 import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ADMINISTRATORS_WILDCARD;
+import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL;
 import static org.apache.hadoop.ozone.OzoneConsts.OM_CHECKPOINT_DIR;
 import static org.apache.hadoop.ozone.OzoneConsts.OM_DB_NAME;
 import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
@@ -35,6 +36,8 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.anyMap;
+import static org.mockito.ArgumentMatchers.anySet;
 import static org.mockito.Mockito.any;
 import static org.mockito.Mockito.anyBoolean;
 import static org.mockito.Mockito.doCallRealMethod;
@@ -146,6 +149,7 @@ void init() throws Exception {
     // ensure cache entries are not evicted thereby snapshot db's are not 
closed
     
conf.setTimeDuration(OMConfigKeys.OZONE_OM_SNAPSHOT_CACHE_CLEANUP_SERVICE_RUN_INTERVAL,
         100, TimeUnit.MINUTES);
+    conf.setTimeDuration(OZONE_SNAPSHOT_DELETING_SERVICE_INTERVAL, 100, 
TimeUnit.MILLISECONDS);
   }
 
   @AfterEach
@@ -228,12 +232,15 @@ public void write(int b) throws IOException {
         .thenReturn(lock);
     doCallRealMethod().when(omDbCheckpointServletMock).getCheckpoint(any(), 
anyBoolean());
     
assertNull(doCallRealMethod().when(omDbCheckpointServletMock).getBootstrapTempData());
-    doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirs(any());
     doCallRealMethod().when(omDbCheckpointServletMock).
         processMetadataSnapshotRequest(any(), any(), anyBoolean(), 
anyBoolean());
     
doCallRealMethod().when(omDbCheckpointServletMock).writeDbDataToStream(any(), 
any(), any(), any());
     doCallRealMethod().when(omDbCheckpointServletMock).getCompactionLogDir();
     doCallRealMethod().when(omDbCheckpointServletMock).getSstBackupDir();
+    doCallRealMethod().when(omDbCheckpointServletMock)
+        .transferSnapshotData(anySet(), any(), anySet(), any(), any(), 
anyMap());
+    
doCallRealMethod().when(omDbCheckpointServletMock).createAndPrepareCheckpoint(anyBoolean());
+    
doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirsFromDB(any());
   }
 
   @ParameterizedTest
@@ -586,6 +593,99 @@ public void testBootstrapLockBlocksMultipleServices() 
throws Exception {
     assertTrue(servicesSucceeded.get() > 0, "Services should have succeeded 
after lock release");
   }
 
+  /**
+   * Tests the full checkpoint servlet flow to ensure snapshot paths are read
+   * from checkpoint metadata (frozen state) rather than live OM metadata 
(current state).
+   * Scenario:
+   * 1. Create snapshots S1
+   * 2. create snapshot S2 later just before checkpoint
+   * 3. Servlet processes checkpoint - should still include S1, S3 data as
+   *    checkpoint snapshotInfoTable has S1 S3
+   */
+  @Test
+  public void testCheckpointIncludesSnapshotsFromFrozenState() throws 
Exception {
+    String volumeName = "vol" + RandomStringUtils.secure().nextNumeric(5);
+    String bucketName = "buck" + RandomStringUtils.secure().nextNumeric(5);
+
+    setupCluster();
+    om.getKeyManager().getSnapshotSstFilteringService().pause();
+
+    // Create test data and snapshots
+    OzoneBucket bucket = TestDataUtil.createVolumeAndBucket(client, 
volumeName, bucketName);
+
+    // Create key before first snapshot
+    TestDataUtil.createKey(bucket, "key1",
+        ReplicationConfig.fromTypeAndFactor(ReplicationType.RATIS, 
ReplicationFactor.ONE),
+        "data1".getBytes(StandardCharsets.UTF_8));
+    client.getObjectStore().createSnapshot(volumeName, bucketName, 
"snapshot1");
+    // At this point: Live OM has snapshots S1
+    List<OzoneSnapshot> snapshots = new ArrayList<>();
+    client.getObjectStore().listSnapshot(volumeName, bucketName, "", null)
+        .forEachRemaining(snapshots::add);
+    assertEquals(1, snapshots.size(), "Should have 1 snapshot initially");
+    OzoneSnapshot snapshot1 = snapshots.stream()
+        .filter(snap -> snap.getName().equals("snapshot1"))
+        .findFirst()
+        .orElseThrow(() -> new RuntimeException("snapshot1 not found"));
+
+    // Setup servlet mocks for checkpoint processing
+    setupMocks();
+    
when(requestMock.getParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA)).thenReturn("true");
+
+    // Create a checkpoint that captures current state (S1)
+    DBStore dbStore = om.getMetadataManager().getStore();
+    DBStore spyDbStore = spy(dbStore);
+    AtomicReference<DBCheckpoint> capturedCheckpoint = new AtomicReference<>();
+
+    when(spyDbStore.getCheckpoint(true)).thenAnswer(invocation -> {
+      // Purge snapshot2 before checkpoint
+      // create snapshot 3 before checkpoint
+      client.getObjectStore().createSnapshot(volumeName, bucketName, 
"snapshot2");
+      // Also wait for double buffer to flush to ensure all transactions are 
committed
+      om.awaitDoubleBufferFlush();
+      DBCheckpoint checkpoint = spy(dbStore.getCheckpoint(true));
+      doNothing().when(checkpoint).cleanupCheckpoint(); // Don't cleanup for 
verification
+      capturedCheckpoint.set(checkpoint);
+      return checkpoint;
+    });
+
+    // Initialize servlet
+    doCallRealMethod().when(omDbCheckpointServletMock).initialize(any(), any(),
+        eq(false), any(), any(), eq(false));
+    omDbCheckpointServletMock.initialize(spyDbStore, 
om.getMetrics().getDBCheckpointMetrics(),
+        false, om.getOmAdminUsernames(), om.getOmAdminGroups(), false);
+    when(responseMock.getOutputStream()).thenReturn(servletOutputStream);
+    // Process checkpoint servlet
+    omDbCheckpointServletMock.doGet(requestMock, responseMock);
+    snapshots.clear();
+    client.getObjectStore().listSnapshot(volumeName, bucketName, "", null)
+        .forEachRemaining(snapshots::add);
+    assertEquals(2, snapshots.size(), "Should have 2 snapshots");
+    OzoneSnapshot snapshot2 = snapshots.stream()
+        .filter(snap -> snap.getName().equals("snapshot2"))
+        .findFirst()
+        .orElseThrow(() -> new RuntimeException("snapshot2 not found"));
+    // Extract tarball and verify contents
+    String testDirName = folder.resolve("testDir").toString();
+    String newDbDirName = testDirName + OM_KEY_PREFIX + OM_DB_NAME;
+    File newDbDir = new File(newDbDirName);
+    assertTrue(newDbDir.mkdirs());
+    FileUtil.unTar(tempFile, newDbDir);
+    OmSnapshotUtils.createHardLinks(newDbDir.toPath(), true);
+    Path snapshot1DbDir = Paths.get(newDbDir.toPath().toString(),  
OM_SNAPSHOT_CHECKPOINT_DIR,
+        OM_DB_NAME + "-" + snapshot1.getSnapshotId());
+    Path snapshot2DbDir = Paths.get(newDbDir.toPath().toString(),  
OM_SNAPSHOT_CHECKPOINT_DIR,
+        OM_DB_NAME + "-" + snapshot2.getSnapshotId());
+    boolean snapshot1IncludedInCheckpoint = Files.exists(snapshot1DbDir);
+    boolean snapshot2IncludedInCheckpoint = Files.exists(snapshot2DbDir);
+    assertTrue(snapshot1IncludedInCheckpoint && snapshot2IncludedInCheckpoint,
+        "Checkpoint should include both snapshot1 and snapshot2 data");
+    // Cleanup
+    if (capturedCheckpoint.get() != null) {
+      capturedCheckpoint.get().cleanupCheckpoint();
+    }
+  }
+
   private static void deleteWalFiles(Path snapshotDbDir) throws IOException {
     try (Stream<Path> filesInTarball = Files.list(snapshotDbDir)) {
       List<Path> files = filesInTarball.filter(p -> 
p.toString().contains(".log"))
@@ -648,6 +748,7 @@ private void setupClusterAndMocks(String volumeName, String 
bucketName,
     // Init the mock with the spyDbstore
     doCallRealMethod().when(omDbCheckpointServletMock).initialize(any(), any(),
         eq(false), any(), any(), eq(false));
+    
doCallRealMethod().when(omDbCheckpointServletMock).getSnapshotDirsFromDB(any());
     omDbCheckpointServletMock.initialize(spyDbStore, 
om.getMetrics().getDBCheckpointMetrics(),
         false,
         om.getOmAdminUsernames(), om.getOmAdminGroups(), false);
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
index 27e7f1c2d6d..0c120ba080d 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
@@ -67,6 +67,7 @@
 import org.apache.hadoop.hdds.utils.DBCheckpointServlet;
 import org.apache.hadoop.hdds.utils.db.DBCheckpoint;
 import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
 import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.lock.BootstrapStateHandler;
 import org.apache.hadoop.ozone.om.helpers.SnapshotInfo;
@@ -219,7 +220,7 @@ public void writeDbDataToStream(HttpServletRequest request, 
OutputStream destina
     if (!includeSnapshotData) {
       maxTotalSstSize.set(Long.MAX_VALUE);
     } else {
-      snapshotPaths = getSnapshotDirs(omMetadataManager);
+      snapshotPaths = getSnapshotDirsFromDB(omMetadataManager);
     }
 
     if (sstFilesToExclude.isEmpty()) {
@@ -263,6 +264,11 @@ public void writeDbDataToStream(HttpServletRequest 
request, OutputStream destina
         writeDBToArchive(sstFilesToExclude, checkpointDir,
             maxTotalSstSize, archiveOutputStream, tmpdir, hardLinkFileMap, 
false);
         if (includeSnapshotData) {
+          // get the list of snapshots from the checkpoint
+          try (OmMetadataManagerImpl checkpointMetadataManager = 
OmMetadataManagerImpl
+              .createCheckpointMetadataManager(om.getConfiguration(), 
checkpoint)) {
+            snapshotPaths = getSnapshotDirsFromDB(checkpointMetadataManager);
+          }
           writeDBToArchive(sstFilesToExclude, getCompactionLogDir(), 
maxTotalSstSize, archiveOutputStream, tmpdir,
               hardLinkFileMap, false);
           writeDBToArchive(sstFilesToExclude, sstBackupFiles.stream(),
@@ -295,7 +301,7 @@ public void writeDbDataToStream(HttpServletRequest request, 
OutputStream destina
    * @param hardLinkFileMap     Map of hardlink file paths to their unique 
identifiers for deduplication.
    * @throws IOException if an I/O error occurs during processing.
    */
-  private void transferSnapshotData(Set<String> sstFilesToExclude, Path 
tmpdir, Set<Path> snapshotPaths,
+  void transferSnapshotData(Set<String> sstFilesToExclude, Path tmpdir, 
Set<Path> snapshotPaths,
       AtomicLong maxTotalSstSize, ArchiveOutputStream<TarArchiveEntry> 
archiveOutputStream,
       Map<String, String> hardLinkFileMap) throws IOException {
     OzoneManager om = (OzoneManager) 
getServletContext().getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE);
@@ -380,20 +386,24 @@ private OzoneConfiguration getConf() {
   }
 
   /**
-   * Collects paths to all snapshot databases.
+   * Collects paths to all snapshot databases from the OM DB.
    *
    * @param omMetadataManager OMMetadataManager instance
    * @return Set of paths to snapshot databases
    * @throws IOException if an I/O error occurs
    */
-  Set<Path> getSnapshotDirs(OMMetadataManager omMetadataManager) throws 
IOException {
+  Set<Path> getSnapshotDirsFromDB(OMMetadataManager omMetadataManager) throws 
IOException {
     Set<Path> snapshotPaths = new HashSet<>();
-    SnapshotChainManager snapshotChainManager = new 
SnapshotChainManager(omMetadataManager);
-    for (SnapshotChainInfo snapInfo : 
snapshotChainManager.getGlobalSnapshotChain().values()) {
-      String snapshotDir =
-          OmSnapshotManager.getSnapshotPath(getConf(), 
SnapshotInfo.getCheckpointDirName(snapInfo.getSnapshotId()));
-      Path path = Paths.get(snapshotDir);
-      snapshotPaths.add(path);
+    try (TableIterator<String, ? extends Table.KeyValue<String, SnapshotInfo>> 
iter =
+        omMetadataManager.getSnapshotInfoTable().iterator()) {
+      while (iter.hasNext()) {
+        Table.KeyValue<String, SnapshotInfo> kv = iter.next();
+        SnapshotInfo snapshotInfo = kv.getValue();
+        String snapshotDir = OmSnapshotManager.getSnapshotPath(getConf(),
+            snapshotInfo.getCheckpointDirName());
+        Path path = Paths.get(snapshotDir);
+        snapshotPaths.add(path);
+      }
     }
     return snapshotPaths;
   }
@@ -482,7 +492,7 @@ private boolean writeDBToArchive(Set<String> 
sstFilesToExclude, Stream<Path> fil
    * @param flush  If true, flushes in-memory data to disk before 
checkpointing.
    * @throws IOException If an error occurs during checkpoint creation or file 
copying.
    */
-  private DBCheckpoint createAndPrepareCheckpoint(boolean flush) throws 
IOException {
+  DBCheckpoint createAndPrepareCheckpoint(boolean flush) throws IOException {
     // Create & return the checkpoint.
     return getDbStore().getCheckpoint(flush);
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to