This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 0480adf941 remove server invalidation from tablet cache (#5431)
0480adf941 is described below

commit 0480adf94116e2e98f5d64e9df3a1b8d8ef6e6a1
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Thu Mar 27 11:52:44 2025 -0400

    remove server invalidation from tablet cache (#5431)
    
    The client side tablet cache supports invalidating all entries in the
    cache related a to tablet server.  This is usually called when client
    code sees an unexpected exception on a server.  There are two cases that
    could cause this.  In one case the tablet server is healthy and
    something like a user iterator caused an exception.  For this case
    invalidating everything in the cache related to the server may cause
    uneeded metadata reads.  The other case is that the tablet server is
    unhealthy and any interaction with it will fail. In this case
    invalidating everything in the cache makes sense. However, until that
    unhealthy server no longer has tablet locations set in the metadata
    table, the optimization to clear all servers from the cache is not going
    to help overall.
    
    So clearing servers in the cache is sometimes harmful and probably not
    really helpful.  Therefore this change removes the ability to clear
    everything for a server.  The code that used to call this was modified
    to clear the extents it was trying to access on the server, instead of
    everything for the server.
    
    Another reason to remove server invalidation is the cache supports
    removing entries where the tserver no longer has a lock.  This removal
    is lazy in that it happens when something tries to access the entry.
    
    One nice benefit of this is that clearing extents from the cache is
    always fast because the cache is indexed by extent.  However clearing a
    server required a full scan of the cache, which could be slow on a table
    with lots of tablets.
    
    
    Co-authored-by: Christopher Tubbs <ctubb...@apache.org>
---
 .../core/clientImpl/ClientTabletCache.java         |  5 ---
 .../core/clientImpl/ClientTabletCacheImpl.java     | 43 +---------------------
 .../core/clientImpl/ConditionalWriterImpl.java     |  9 +----
 .../core/clientImpl/RootClientTabletCache.java     |  5 ---
 .../core/clientImpl/SyncingClientTabletCache.java  |  5 ---
 .../TabletServerBatchReaderIterator.java           |  2 +-
 .../core/clientImpl/TabletServerBatchWriter.java   | 15 +++-----
 .../accumulo/core/clientImpl/ThriftScanner.java    |  3 +-
 .../metadata/MetadataCachedTabletObtainer.java     |  4 +-
 .../core/clientImpl/ClientTabletCacheImplTest.java | 20 ++++------
 .../core/clientImpl/RootClientTabletCacheTest.java |  1 -
 11 files changed, 20 insertions(+), 92 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
index 05c018fae9..3224eb5e32 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
@@ -180,11 +180,6 @@ public abstract class ClientTabletCache {
    */
   public abstract void invalidateCache();
 
-  /**
-   * Invalidate all metadata entries that point to server
-   */
-  public abstract void invalidateCache(ClientContext context, String server);
-
   public long getTabletHostingRequestCount() {
     return 0L;
   }
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
index 5962c23e98..b1f0f913d3 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
@@ -101,7 +101,6 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
   protected final Text lastTabletRow;
 
   private final TreeSet<KeyExtent> badExtents = new TreeSet<>();
-  private final HashSet<String> badServers = new HashSet<>();
   private final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
   private final Lock rLock = rwLock.readLock();
   private final Lock wLock = rwLock.writeLock();
@@ -503,23 +502,6 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
     }
   }
 
-  @Override
-  public void invalidateCache(ClientContext context, String server) {
-
-    wLock.lock();
-    try {
-      badServers.add(server);
-    } finally {
-      wLock.unlock();
-    }
-
-    lockChecker.invalidateCache(server);
-
-    if (log.isTraceEnabled()) {
-      log.trace("queued invalidation for table={} server={}", tableId, server);
-    }
-  }
-
   @Override
   public void invalidateCache() {
     int invalidatedCount;
@@ -919,7 +901,7 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
       throws AccumuloSecurityException, AccumuloException, 
TableNotFoundException,
       InvalidTabletHostingRequestException {
 
-    if (badExtents.isEmpty() && badServers.isEmpty()) {
+    if (badExtents.isEmpty()) {
       return;
     }
 
@@ -928,7 +910,7 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
       if (!writeLockHeld) {
         rLock.unlock();
         wLock.lock();
-        if (badExtents.isEmpty() && badServers.isEmpty()) {
+        if (badExtents.isEmpty()) {
           return;
         }
       }
@@ -940,27 +922,6 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
         removeOverlapping(metaCache, be);
       }
 
-      if (!badServers.isEmpty()) {
-        int removedCount = 0;
-        var locationIterator = metaCache.values().iterator();
-        while (locationIterator.hasNext()) {
-          var cacheEntry = locationIterator.next();
-          if (cacheEntry.getTserverLocation().isPresent()
-              && 
badServers.contains(cacheEntry.getTserverLocation().orElseThrow())) {
-            locationIterator.remove();
-            lookups.add(cacheEntry.getExtent().toMetaRange());
-            removedCount++;
-          }
-        }
-
-        if (log.isTraceEnabled()) {
-          log.trace("Invalidated {} cache entries for table {} related to 
servers {}", removedCount,
-              tableId, badServers);
-        }
-
-        badServers.clear();
-      }
-
       lookups = Range.mergeOverlapping(lookups);
 
       Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<>();
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
index b804e8ab49..e7b87d3f07 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
@@ -624,7 +624,7 @@ public class ConditionalWriterImpl implements 
ConditionalWriter {
     } catch (TApplicationException tae) {
       queueException(location, cmidToCm, new 
AccumuloServerException(location.toString(), tae));
     } catch (TException e) {
-      locator.invalidateCache(context, location.toString());
+      locator.invalidateCache(mutations.getMutations().keySet());
       invalidateSession(location, cmidToCm, sessionId);
     } catch (Exception e) {
       queueException(location, cmidToCm, e);
@@ -686,10 +686,6 @@ public class ConditionalWriterImpl implements 
ConditionalWriter {
 
     while (true) {
       if (!ServiceLock.isLockHeld(context.getZooCache(), lid)) {
-        // ACCUMULO-1152 added a tserver lock check to the tablet location 
cache, so this
-        // invalidation prevents future attempts to contact the
-        // tserver even its gone zombie and is still running w/o a lock
-        locator.invalidateCache(context, location.toString());
         log.trace("tablet server {} {} is dead, so no need to invalidate {}", 
location,
             sessionId.lockId, sessionId.sessionID);
         return;
@@ -705,7 +701,6 @@ public class ConditionalWriterImpl implements 
ConditionalWriter {
       } catch (TApplicationException tae) {
         throw new AccumuloServerException(location.toString(), tae);
       } catch (TException e) {
-        locator.invalidateCache(context, location.toString());
         log.trace("Failed to invalidate {} at {} {}", sessionId.sessionID, 
location,
             e.getMessage());
       }
@@ -716,9 +711,7 @@ public class ConditionalWriterImpl implements 
ConditionalWriter {
 
       sleepUninterruptibly(sleepTime, MILLISECONDS);
       sleepTime = Math.min(2 * sleepTime, MAX_SLEEP);
-
     }
-
   }
 
   private void invalidateSession(long sessionId, HostAndPort location) throws 
TException {
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
index 16eca25bc8..4eac34a7bb 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
@@ -107,11 +107,6 @@ public class RootClientTabletCache extends 
ClientTabletCache {
     // no-op see class level javadoc
   }
 
-  @Override
-  public void invalidateCache(ClientContext context, String server) {
-    // no-op see class level javadoc
-  }
-
   @Override
   public void invalidateCache() {
     // no-op see class level javadoc
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
index 571d9704df..ffc2c0c548 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
@@ -111,9 +111,4 @@ public class SyncingClientTabletCache extends 
ClientTabletCache {
   public void invalidateCache() {
     syncLocator().invalidateCache();
   }
-
-  @Override
-  public void invalidateCache(ClientContext context, String server) {
-    syncLocator().invalidateCache(context, server);
-  }
 }
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
index 2a1e786408..a7de7f5e76 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
@@ -442,7 +442,7 @@ public class TabletServerBatchReaderIterator implements 
Iterator<Entry<Key,Value
             failures.putAll(unscanned);
           }
 
-          locator.invalidateCache(context, tsLocation);
+          locator.invalidateCache(tabletsRanges.keySet());
         }
         log.debug("IOException thrown", e);
 
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
index 505cf3569c..c1c4ef1a5d 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
@@ -49,6 +49,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
 
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.AccumuloSecurityException;
@@ -919,14 +920,8 @@ public class TabletServerBatchWriter implements 
AutoCloseable {
         } catch (IOException e) {
           log.debug("failed to send mutations to {}", location, e);
 
-          HashSet<TableId> tables = new HashSet<>();
-          for (KeyExtent ke : mutationBatch.keySet()) {
-            tables.add(ke.tableId());
-          }
-
-          for (TableId table : tables) {
-            getLocator(table).invalidateCache(context, location);
-          }
+          
mutationBatch.keySet().stream().collect(Collectors.groupingBy(KeyExtent::tableId))
+              .forEach((k, v) -> getLocator(k).invalidateCache(v));
 
           failedMutations.add(tsm);
         } finally {
@@ -990,7 +985,7 @@ public class TabletServerBatchWriter implements 
AutoCloseable {
           // the write completed successfully so no need to close the session
           sessionCloser.clearSession();
 
-          // @formatter:off
+        // @formatter:off
             Map<KeyExtent,Long> failures = 
updateErrors.failedExtents.entrySet().stream().collect(toMap(
                             entry -> KeyExtent.fromThrift(entry.getKey()),
                             Entry::getValue
@@ -998,7 +993,7 @@ public class TabletServerBatchWriter implements 
AutoCloseable {
             // @formatter:on
           updatedConstraintViolations(updateErrors.violationSummaries.stream()
               .map(ConstraintViolationSummary::new).collect(toList()));
-          // @formatter:off
+        // @formatter:off
             
updateAuthorizationFailures(updateErrors.authorizationFailures.entrySet().stream().collect(toMap(
                             entry -> KeyExtent.fromThrift(entry.getKey()),
                             Entry::getValue
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java
index f67d56077e..4d6c792c0d 100644
--- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java
@@ -774,8 +774,7 @@ public class ThriftScanner {
                 e.getCause() != null && 
e.getCause().getClass().equals(InterruptedIOException.class)
                     && scanState.closeInitiated;
             if (!wasInterruptedAfterClose) {
-              
context.getTabletLocationCache(scanState.tableId).invalidateCache(context,
-                  addr.serverAddress);
+              
context.getTabletLocationCache(scanState.tableId).invalidateCache(addr.getExtent());
             }
           }
           error = "Scan failed, thrift error " + e.getClass().getName() + "  " 
+ e.getMessage()
diff --git 
a/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
 
b/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
index a14c8209f0..00a7739716 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
@@ -147,7 +147,7 @@ public class MetadataCachedTabletObtainer implements 
CachedTabletObtainer {
       if (log.isTraceEnabled()) {
         log.trace("{} lookup failed", src.getExtent().tableId(), e);
       }
-      parent.invalidateCache(context, src.getTserverLocation().orElseThrow());
+      parent.invalidateCache(src.getExtent());
     }
 
     return null;
@@ -209,7 +209,7 @@ public class MetadataCachedTabletObtainer implements 
CachedTabletObtainer {
       }
     } catch (IOException e) {
       log.trace("lookupTablets failed server={}", tserver, e);
-      parent.invalidateCache(context, tserver);
+      parent.invalidateCache(tabletsRanges.keySet());
     } catch (AccumuloServerException e) {
       log.trace("lookupTablets failed server={}", tserver, e);
       throw e;
diff --git 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
index 26abce5159..601dff11bb 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
@@ -521,7 +521,7 @@ public class ClientTabletCacheImplTest {
           tservers.get(src.getTserverLocation().orElseThrow());
 
       if (tablets == null) {
-        parent.invalidateCache(context, 
src.getTserverLocation().orElseThrow());
+        parent.invalidateCache(src.getExtent());
         return null;
       }
 
@@ -552,7 +552,7 @@ public class ClientTabletCacheImplTest {
       Map<KeyExtent,SortedMap<Key,Value>> tablets = tservers.get(tserver);
 
       if (tablets == null) {
-        parent.invalidateCache(context, tserver);
+        parent.invalidateCache(map.keySet());
         return list;
       }
 
@@ -619,10 +619,6 @@ public class ClientTabletCacheImplTest {
       return new CachedTablet(RootTable.EXTENT, rootTabletLoc, "1", 
TabletAvailability.HOSTED,
           false);
     }
-
-    @Override
-    public void invalidateCache(ClientContext context, String server) {}
-
   }
 
   static void createEmptyTablet(TServers tservers, String server, KeyExtent 
tablet) {
@@ -784,7 +780,7 @@ public class ClientTabletCacheImplTest {
 
     // simulate a server failure
     setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, tab1e21, 
"tserver9");
-    tab1TabletCache.invalidateCache(context, "tserver8");
+    tab1TabletCache.invalidateCache(tab1e21);
     locateTabletTest(tab1TabletCache, "r1", tab1e22, "tserver6");
     locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver9");
     locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver4");
@@ -792,9 +788,9 @@ public class ClientTabletCacheImplTest {
     // simulate all servers failing
     deleteServer(tservers, "tserver1");
     deleteServer(tservers, "tserver2");
-    tab1TabletCache.invalidateCache(context, "tserver4");
-    tab1TabletCache.invalidateCache(context, "tserver6");
-    tab1TabletCache.invalidateCache(context, "tserver9");
+    tab1TabletCache.invalidateCache(tab1e22);
+    tab1TabletCache.invalidateCache(tab1e21);
+    tab1TabletCache.invalidateCache(tab1e1);
 
     locateTabletTest(tab1TabletCache, "r1", null, null);
     locateTabletTest(tab1TabletCache, "h", null, null);
@@ -849,7 +845,7 @@ public class ClientTabletCacheImplTest {
 
     // simulate metadata and regular server down and the reassigned
     deleteServer(tservers, "tserver5");
-    tab1TabletCache.invalidateCache(context, "tserver7");
+    tab1TabletCache.invalidateCache(tab1e1);
     locateTabletTest(tab1TabletCache, "a", null, null);
     locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8");
     locateTabletTest(tab1TabletCache, "r", tab1e22, "tserver9");
@@ -861,7 +857,7 @@ public class ClientTabletCacheImplTest {
     locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver7");
     locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8");
     locateTabletTest(tab1TabletCache, "r", tab1e22, "tserver9");
-    tab1TabletCache.invalidateCache(context, "tserver7");
+    tab1TabletCache.invalidateCache(tab1e1);
     setLocation(tservers, "tserver10", mte1, tab1e1, "tserver2");
     locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver2");
     locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8");
diff --git 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
index 37412f0e21..eed53e5212 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
@@ -51,7 +51,6 @@ public class RootClientTabletCacheTest {
   public void testInvalidateCache_Noop() {
     var rtl = new RootClientTabletCache(lockChecker);
     // it's not expected that any of the validate functions will do anything 
with the mock objects
-    rtl.invalidateCache(context, "server");
     rtl.invalidateCache(RootTable.EXTENT);
     rtl.invalidateCache();
     rtl.invalidateCache(List.of(RootTable.EXTENT));

Reply via email to