This is an automated email from the ASF dual-hosted git repository.

domgarguilo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 7e843611d2 Improve locking mechanism in ClientTabletCacheImpl (#5538)
7e843611d2 is described below

commit 7e843611d21a7c39b6846e56c434f8b4172e5f0c
Author: Dom G. <domgargu...@apache.org>
AuthorDate: Tue Jun 3 16:22:30 2025 -0400

    Improve locking mechanism in ClientTabletCacheImpl (#5538)
    
    * Improve locking mechanism in ClientTabletCacheImpl
    * Make ClientTabletCacheImplTest more concurrently strenuous
---
 .../core/clientImpl/ClientTabletCacheImpl.java     | 31 +++++------
 .../core/clientImpl/ClientTabletCacheImplTest.java | 62 +++++++++++++---------
 2 files changed, 50 insertions(+), 43 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
index 803188b5a2..9d7345998d 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
@@ -690,25 +690,20 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
     metadataRow.append(row.getBytes(), 0, row.getLength());
     CachedTablet ptl = parent.findTablet(context, metadataRow, false, 
LocationNeed.REQUIRED);
 
-    if (ptl != null) {
-      // Only allow a single lookup at time per parent tablet. For example if 
a tables tablets are
-      // all stored in three metadata tablets, then that table could have up 
to three concurrent
-      // metadata lookups.
-      Timer timer = Timer.startNew();
-      try (var unused = lookupLocks.lock(ptl.getExtent())) {
-        // See if entry was added to cache by another thread while we were 
waiting on the lock
-        var cached = findTabletInCache(row);
-        if (cached != null && cached.getCreationTimer().startedAfter(timer)) {
-          // This cache entry was added after we started waiting on the lock 
so lets use it and not
-          // go to the metadata table. This means another thread was holding 
the lock and doing
-          // metadata lookups when we requested the lock.
-          return;
-        }
-        // Lookup tablets in metadata table and update cache. Also updating 
the cache while holding
-        // the lock is important as it ensures other threads that are waiting 
on the lock will see
-        // what this thread found and may be able to avoid metadata lookups.
-        lookupTablet(context, lcSession, ptl, metadataRow);
+    if (ptl == null) {
+      return;
+    }
+    // detect if another thread populated cache while waiting for lock
+    CachedTablet before = findTabletInCache(row);
+    try (var unused = lookupLocks.lock(ptl.getExtent())) {
+      CachedTablet after = findTabletInCache(row);
+      if (after != null && after != before && lcSession.checkLock(after) != 
null) {
+        return;
       }
+      // Lookup tablets in metadata table and update cache. Also updating the 
cache while holding
+      // the lock is important as it ensures other threads that are waiting on 
the lock will see
+      // what this thread found and may be able to avoid metadata lookups.
+      lookupTablet(context, lcSession, ptl, metadataRow);
     }
   }
 
diff --git 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
index 87855d2e12..372af80083 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
@@ -19,6 +19,7 @@
 package org.apache.accumulo.core.clientImpl;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.accumulo.core.util.LazySingletons.RANDOM;
 import static org.easymock.EasyMock.replay;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -1986,45 +1987,56 @@ public class ClientTabletCacheImplTest {
     setLocation(tservers, "tserver3", mte2, ke3, "tserver9");
 
     var executor = Executors.newCachedThreadPool();
-    List<Future<CachedTablet>> futures = new ArrayList<>();
+    final int lookupCount = 128;
+    final int roundCount = 8;
 
-    // start 64 threads all trying to lookup data in the cache, should see 
only two threads do a
-    // concurrent lookup in the metadata table and no more or less.
-    List<String> rowsToLookup = new ArrayList<>();
+    List<Future<CachedTablet>> futures = new ArrayList<>(roundCount * 
lookupCount);
 
-    for (int i = 0; i < 64; i++) {
-      String lookup = (char) ('a' + (i % 26)) + "";
-      rowsToLookup.add(lookup);
-    }
+    // multiple rounds to increase the chance of contention
+    for (int round = 0; round < roundCount; round++) {
 
-    Collections.shuffle(rowsToLookup);
-
-    for (var lookup : rowsToLookup) {
-      var future = executor.submit(() -> {
-        var loc = metaCache.findTablet(context, new Text(lookup), false, 
LocationNeed.REQUIRED);
-        if (lookup.compareTo("m") <= 0) {
-          assertEquals("tserver7", loc.getTserverLocation().orElseThrow());
-        } else if (lookup.compareTo("q") <= 0) {
-          assertEquals("tserver8", loc.getTserverLocation().orElseThrow());
-        } else {
-          assertEquals("tserver9", loc.getTserverLocation().orElseThrow());
-        }
-        return loc;
-      });
-      futures.add(future);
+      // start a bunch of threads all trying to lookup data in the cache
+      // should see exactly 2 threads doing metadata lookups at a time
+      List<String> rowsToLookup = new ArrayList<>(lookupCount);
+
+      for (int i = 0; i < lookupCount; i++) {
+        String lookup = (char) ('a' + (i % 26)) + "";
+        rowsToLookup.add(lookup);
+      }
+
+      Collections.shuffle(rowsToLookup, RANDOM.get());
+
+      for (var lookup : rowsToLookup) {
+        var future = executor.submit(() -> {
+          if (RANDOM.get().nextInt(10) < 3) {
+            Thread.yield();
+          }
+          var loc = metaCache.findTablet(context, new Text(lookup), false, 
LocationNeed.REQUIRED);
+          if (lookup.compareTo("m") <= 0) {
+            assertEquals("tserver7", loc.getTserverLocation().orElseThrow());
+          } else if (lookup.compareTo("q") <= 0) {
+            assertEquals("tserver8", loc.getTserverLocation().orElseThrow());
+          } else {
+            assertEquals("tserver9", loc.getTserverLocation().orElseThrow());
+          }
+          return loc;
+        });
+        futures.add(future);
+      }
     }
 
     for (var future : futures) {
       assertNotNull(future.get());
     }
 
-    assertTrue(sawTwoActive.get());
+    assertTrue(sawTwoActive.get(), "Expected to see exactly two lookups.");
     // The second metadata tablet (mte2) contains two user tablets (ke2 and 
ke3). Depending on which
     // of these two user tablets is looked up in the metadata table first will 
see a total of 2 or 3
     // lookups. If the location of ke2 is looked up first then it will get the 
locations of ke2 and
     // ke3 from mte2 and put them in the cache. If the location of ke3 is 
looked up first then it
     // will only get the location of ke3 from mte2 and not ke2.
-    assertTrue(lookups.size() == 2 || lookups.size() == 3, lookups::toString);
+    assertTrue(lookups.size() == 2 || lookups.size() == 3,
+        "Expected 2 or 3 lookups, got " + lookups.size() + " : " + lookups);
     assertEquals(1, lookups.stream().filter(metadataExtent -> 
metadataExtent.equals(mte1)).count(),
         lookups::toString);
     var mte2Lookups =

Reply via email to