This is an automated email from the ASF dual-hosted git repository. weichiu pushed a commit to branch ozone-2.1 in repository https://gitbox.apache.org/repos/asf/ozone.git
commit 163ddfae2d0f6f62072c9c55133da326b54c96f9 Author: Swaminathan Balachandran <[email protected]> AuthorDate: Wed Oct 29 07:40:23 2025 -0400 HDDS-13004. Snapshot Cache lock on a specific snapshotId (#9210) (cherry picked from commit 2806bae18125a27a732f797eb957f0c103e89136) --- .../hadoop/ozone/om/snapshot/SnapshotCache.java | 90 +++++++++++++--------- .../ozone/om/snapshot/TestSnapshotCache.java | 14 +++- 2 files changed, 67 insertions(+), 37 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 81c9dc46554..ff0b04b0541 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -287,14 +287,26 @@ public void release(UUID key) { */ public UncheckedAutoCloseableSupplier<OMLockDetails> lock() { return lock(() -> lock.acquireResourceWriteLock(SNAPSHOT_DB_LOCK), - () -> lock.releaseResourceWriteLock(SNAPSHOT_DB_LOCK)); + () -> lock.releaseResourceWriteLock(SNAPSHOT_DB_LOCK), () -> cleanup(true)); } - private UncheckedAutoCloseableSupplier<OMLockDetails> lock( - Supplier<OMLockDetails> lockFunction, Supplier<OMLockDetails> unlockFunction) { + /** + * Acquires a write lock on a specific snapshot database and returns an auto-closeable supplier for lock details. + * The lock ensures that the operations accessing the snapshot database are performed in a thread safe manner. The + * returned supplier automatically releases the lock acquired when closed, preventing potential resource + * contention or deadlocks. + */ + public UncheckedAutoCloseableSupplier<OMLockDetails> lock(UUID snapshotId) { + return lock(() -> lock.acquireWriteLock(SNAPSHOT_DB_LOCK, snapshotId.toString()), + () -> lock.releaseWriteLock(SNAPSHOT_DB_LOCK, snapshotId.toString()), + () -> cleanup(snapshotId)); + } + + private UncheckedAutoCloseableSupplier<OMLockDetails> lock(Supplier<OMLockDetails> lockFunction, + Supplier<OMLockDetails> unlockFunction, Supplier<Void> cleanupFunction) { AtomicReference<OMLockDetails> lockDetails = new AtomicReference<>(lockFunction.get()); if (lockDetails.get().isLockAcquired()) { - cleanup(true); + cleanupFunction.get(); if (!dbMap.isEmpty()) { lockDetails.set(unlockFunction.get()); } @@ -323,43 +335,49 @@ public OMLockDetails get() { * If cache size exceeds soft limit, attempt to clean up and close the instances that has zero reference count. */ - private synchronized void cleanup(boolean force) { + private synchronized Void cleanup(boolean force) { if (force || dbMap.size() > cacheSizeLimit) { for (UUID evictionKey : pendingEvictionQueue) { - ReferenceCounted<OmSnapshot> snapshot = dbMap.get(evictionKey); - if (snapshot != null && snapshot.getTotalRefCount() == 0) { - try { - compactSnapshotDB(snapshot.get()); - } catch (IOException e) { - LOG.warn("Failed to compact snapshot DB for snapshotId {}: {}", - evictionKey, e.getMessage()); - } - } - - dbMap.compute(evictionKey, (k, v) -> { - pendingEvictionQueue.remove(k); - if (v == null) { - throw new IllegalStateException("SnapshotId '" + k + "' does not exist in cache. The RocksDB " + - "instance of the Snapshot may not be closed properly."); - } + cleanup(evictionKey); + } + } + return null; + } - if (v.getTotalRefCount() > 0) { - LOG.debug("SnapshotId {} is still being referenced ({}), skipping its clean up.", k, v.getTotalRefCount()); - return v; - } else { - LOG.debug("Closing SnapshotId {}. It is not being referenced anymore.", k); - // Close the instance, which also closes its DB handle. - try { - v.get().close(); - } catch (IOException ex) { - throw new IllegalStateException("Error while closing snapshot DB.", ex); - } - omMetrics.decNumSnapshotCacheSize(); - return null; - } - }); + private synchronized Void cleanup(UUID evictionKey) { + ReferenceCounted<OmSnapshot> snapshot = dbMap.get(evictionKey); + if (snapshot != null && snapshot.getTotalRefCount() == 0) { + try { + compactSnapshotDB(snapshot.get()); + } catch (IOException e) { + LOG.warn("Failed to compact snapshot DB for snapshotId {}: {}", + evictionKey, e.getMessage()); } } + + dbMap.compute(evictionKey, (k, v) -> { + pendingEvictionQueue.remove(k); + if (v == null) { + throw new IllegalStateException("SnapshotId '" + k + "' does not exist in cache. The RocksDB " + + "instance of the Snapshot may not be closed properly."); + } + + if (v.getTotalRefCount() > 0) { + LOG.debug("SnapshotId {} is still being referenced ({}), skipping its clean up.", k, v.getTotalRefCount()); + return v; + } else { + LOG.debug("Closing SnapshotId {}. It is not being referenced anymore.", k); + // Close the instance, which also closes its DB handle. + try { + v.get().close(); + } catch (IOException ex) { + throw new IllegalStateException("Error while closing snapshot DB.", ex); + } + omMetrics.decNumSnapshotCacheSize(); + return null; + } + }); + return null; } /** diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java index 9406d74c5ff..e3de9653f1f 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java @@ -173,7 +173,7 @@ public void testGetHoldsReadLock(int numberOfLocks) throws IOException { @ParameterizedTest @ValueSource(ints = {0, 1, 5, 10}) @DisplayName("Tests lock() holds a write lock") - public void testGetHoldsWriteLock(int numberOfLocks) { + public void testLockHoldsWriteLock(int numberOfLocks) { clearInvocations(lock); for (int i = 0; i < numberOfLocks; i++) { snapshotCache.lock(); @@ -181,6 +181,18 @@ public void testGetHoldsWriteLock(int numberOfLocks) { verify(lock, times(numberOfLocks)).acquireResourceWriteLock(eq(SNAPSHOT_DB_LOCK)); } + @ParameterizedTest + @ValueSource(ints = {0, 1, 5, 10}) + @DisplayName("Tests lock(snapshotId) holds a write lock") + public void testLockHoldsWriteLockSnapshotId(int numberOfLocks) { + clearInvocations(lock); + UUID snapshotId = UUID.randomUUID(); + for (int i = 0; i < numberOfLocks; i++) { + snapshotCache.lock(snapshotId); + } + verify(lock, times(numberOfLocks)).acquireWriteLock(eq(SNAPSHOT_DB_LOCK), eq(snapshotId.toString())); + } + @Test @DisplayName("get() same entry twice yields one cache entry only") void testGetTwice() throws IOException { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
