This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new 0480adf941 remove server invalidation from tablet cache (#5431) 0480adf941 is described below commit 0480adf94116e2e98f5d64e9df3a1b8d8ef6e6a1 Author: Keith Turner <ktur...@apache.org> AuthorDate: Thu Mar 27 11:52:44 2025 -0400 remove server invalidation from tablet cache (#5431) The client side tablet cache supports invalidating all entries in the cache related a to tablet server. This is usually called when client code sees an unexpected exception on a server. There are two cases that could cause this. In one case the tablet server is healthy and something like a user iterator caused an exception. For this case invalidating everything in the cache related to the server may cause uneeded metadata reads. The other case is that the tablet server is unhealthy and any interaction with it will fail. In this case invalidating everything in the cache makes sense. However, until that unhealthy server no longer has tablet locations set in the metadata table, the optimization to clear all servers from the cache is not going to help overall. So clearing servers in the cache is sometimes harmful and probably not really helpful. Therefore this change removes the ability to clear everything for a server. The code that used to call this was modified to clear the extents it was trying to access on the server, instead of everything for the server. Another reason to remove server invalidation is the cache supports removing entries where the tserver no longer has a lock. This removal is lazy in that it happens when something tries to access the entry. One nice benefit of this is that clearing extents from the cache is always fast because the cache is indexed by extent. However clearing a server required a full scan of the cache, which could be slow on a table with lots of tablets. Co-authored-by: Christopher Tubbs <ctubb...@apache.org> --- .../core/clientImpl/ClientTabletCache.java | 5 --- .../core/clientImpl/ClientTabletCacheImpl.java | 43 +--------------------- .../core/clientImpl/ConditionalWriterImpl.java | 9 +---- .../core/clientImpl/RootClientTabletCache.java | 5 --- .../core/clientImpl/SyncingClientTabletCache.java | 5 --- .../TabletServerBatchReaderIterator.java | 2 +- .../core/clientImpl/TabletServerBatchWriter.java | 15 +++----- .../accumulo/core/clientImpl/ThriftScanner.java | 3 +- .../metadata/MetadataCachedTabletObtainer.java | 4 +- .../core/clientImpl/ClientTabletCacheImplTest.java | 20 ++++------ .../core/clientImpl/RootClientTabletCacheTest.java | 1 - 11 files changed, 20 insertions(+), 92 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java index 05c018fae9..3224eb5e32 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java @@ -180,11 +180,6 @@ public abstract class ClientTabletCache { */ public abstract void invalidateCache(); - /** - * Invalidate all metadata entries that point to server - */ - public abstract void invalidateCache(ClientContext context, String server); - public long getTabletHostingRequestCount() { return 0L; } diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java index 5962c23e98..b1f0f913d3 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java @@ -101,7 +101,6 @@ public class ClientTabletCacheImpl extends ClientTabletCache { protected final Text lastTabletRow; private final TreeSet<KeyExtent> badExtents = new TreeSet<>(); - private final HashSet<String> badServers = new HashSet<>(); private final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); private final Lock rLock = rwLock.readLock(); private final Lock wLock = rwLock.writeLock(); @@ -503,23 +502,6 @@ public class ClientTabletCacheImpl extends ClientTabletCache { } } - @Override - public void invalidateCache(ClientContext context, String server) { - - wLock.lock(); - try { - badServers.add(server); - } finally { - wLock.unlock(); - } - - lockChecker.invalidateCache(server); - - if (log.isTraceEnabled()) { - log.trace("queued invalidation for table={} server={}", tableId, server); - } - } - @Override public void invalidateCache() { int invalidatedCount; @@ -919,7 +901,7 @@ public class ClientTabletCacheImpl extends ClientTabletCache { throws AccumuloSecurityException, AccumuloException, TableNotFoundException, InvalidTabletHostingRequestException { - if (badExtents.isEmpty() && badServers.isEmpty()) { + if (badExtents.isEmpty()) { return; } @@ -928,7 +910,7 @@ public class ClientTabletCacheImpl extends ClientTabletCache { if (!writeLockHeld) { rLock.unlock(); wLock.lock(); - if (badExtents.isEmpty() && badServers.isEmpty()) { + if (badExtents.isEmpty()) { return; } } @@ -940,27 +922,6 @@ public class ClientTabletCacheImpl extends ClientTabletCache { removeOverlapping(metaCache, be); } - if (!badServers.isEmpty()) { - int removedCount = 0; - var locationIterator = metaCache.values().iterator(); - while (locationIterator.hasNext()) { - var cacheEntry = locationIterator.next(); - if (cacheEntry.getTserverLocation().isPresent() - && badServers.contains(cacheEntry.getTserverLocation().orElseThrow())) { - locationIterator.remove(); - lookups.add(cacheEntry.getExtent().toMetaRange()); - removedCount++; - } - } - - if (log.isTraceEnabled()) { - log.trace("Invalidated {} cache entries for table {} related to servers {}", removedCount, - tableId, badServers); - } - - badServers.clear(); - } - lookups = Range.mergeOverlapping(lookups); Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<>(); diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java index b804e8ab49..e7b87d3f07 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java @@ -624,7 +624,7 @@ public class ConditionalWriterImpl implements ConditionalWriter { } catch (TApplicationException tae) { queueException(location, cmidToCm, new AccumuloServerException(location.toString(), tae)); } catch (TException e) { - locator.invalidateCache(context, location.toString()); + locator.invalidateCache(mutations.getMutations().keySet()); invalidateSession(location, cmidToCm, sessionId); } catch (Exception e) { queueException(location, cmidToCm, e); @@ -686,10 +686,6 @@ public class ConditionalWriterImpl implements ConditionalWriter { while (true) { if (!ServiceLock.isLockHeld(context.getZooCache(), lid)) { - // ACCUMULO-1152 added a tserver lock check to the tablet location cache, so this - // invalidation prevents future attempts to contact the - // tserver even its gone zombie and is still running w/o a lock - locator.invalidateCache(context, location.toString()); log.trace("tablet server {} {} is dead, so no need to invalidate {}", location, sessionId.lockId, sessionId.sessionID); return; @@ -705,7 +701,6 @@ public class ConditionalWriterImpl implements ConditionalWriter { } catch (TApplicationException tae) { throw new AccumuloServerException(location.toString(), tae); } catch (TException e) { - locator.invalidateCache(context, location.toString()); log.trace("Failed to invalidate {} at {} {}", sessionId.sessionID, location, e.getMessage()); } @@ -716,9 +711,7 @@ public class ConditionalWriterImpl implements ConditionalWriter { sleepUninterruptibly(sleepTime, MILLISECONDS); sleepTime = Math.min(2 * sleepTime, MAX_SLEEP); - } - } private void invalidateSession(long sessionId, HostAndPort location) throws TException { diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java index 16eca25bc8..4eac34a7bb 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java @@ -107,11 +107,6 @@ public class RootClientTabletCache extends ClientTabletCache { // no-op see class level javadoc } - @Override - public void invalidateCache(ClientContext context, String server) { - // no-op see class level javadoc - } - @Override public void invalidateCache() { // no-op see class level javadoc diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java index 571d9704df..ffc2c0c548 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java @@ -111,9 +111,4 @@ public class SyncingClientTabletCache extends ClientTabletCache { public void invalidateCache() { syncLocator().invalidateCache(); } - - @Override - public void invalidateCache(ClientContext context, String server) { - syncLocator().invalidateCache(context, server); - } } diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java index 2a1e786408..a7de7f5e76 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java @@ -442,7 +442,7 @@ public class TabletServerBatchReaderIterator implements Iterator<Entry<Key,Value failures.putAll(unscanned); } - locator.invalidateCache(context, tsLocation); + locator.invalidateCache(tabletsRanges.keySet()); } log.debug("IOException thrown", e); diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java index 505cf3569c..c1c4ef1a5d 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java @@ -49,6 +49,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; @@ -919,14 +920,8 @@ public class TabletServerBatchWriter implements AutoCloseable { } catch (IOException e) { log.debug("failed to send mutations to {}", location, e); - HashSet<TableId> tables = new HashSet<>(); - for (KeyExtent ke : mutationBatch.keySet()) { - tables.add(ke.tableId()); - } - - for (TableId table : tables) { - getLocator(table).invalidateCache(context, location); - } + mutationBatch.keySet().stream().collect(Collectors.groupingBy(KeyExtent::tableId)) + .forEach((k, v) -> getLocator(k).invalidateCache(v)); failedMutations.add(tsm); } finally { @@ -990,7 +985,7 @@ public class TabletServerBatchWriter implements AutoCloseable { // the write completed successfully so no need to close the session sessionCloser.clearSession(); - // @formatter:off + // @formatter:off Map<KeyExtent,Long> failures = updateErrors.failedExtents.entrySet().stream().collect(toMap( entry -> KeyExtent.fromThrift(entry.getKey()), Entry::getValue @@ -998,7 +993,7 @@ public class TabletServerBatchWriter implements AutoCloseable { // @formatter:on updatedConstraintViolations(updateErrors.violationSummaries.stream() .map(ConstraintViolationSummary::new).collect(toList())); - // @formatter:off + // @formatter:off updateAuthorizationFailures(updateErrors.authorizationFailures.entrySet().stream().collect(toMap( entry -> KeyExtent.fromThrift(entry.getKey()), Entry::getValue diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java index f67d56077e..4d6c792c0d 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java @@ -774,8 +774,7 @@ public class ThriftScanner { e.getCause() != null && e.getCause().getClass().equals(InterruptedIOException.class) && scanState.closeInitiated; if (!wasInterruptedAfterClose) { - context.getTabletLocationCache(scanState.tableId).invalidateCache(context, - addr.serverAddress); + context.getTabletLocationCache(scanState.tableId).invalidateCache(addr.getExtent()); } } error = "Scan failed, thrift error " + e.getClass().getName() + " " + e.getMessage() diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java b/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java index a14c8209f0..00a7739716 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java @@ -147,7 +147,7 @@ public class MetadataCachedTabletObtainer implements CachedTabletObtainer { if (log.isTraceEnabled()) { log.trace("{} lookup failed", src.getExtent().tableId(), e); } - parent.invalidateCache(context, src.getTserverLocation().orElseThrow()); + parent.invalidateCache(src.getExtent()); } return null; @@ -209,7 +209,7 @@ public class MetadataCachedTabletObtainer implements CachedTabletObtainer { } } catch (IOException e) { log.trace("lookupTablets failed server={}", tserver, e); - parent.invalidateCache(context, tserver); + parent.invalidateCache(tabletsRanges.keySet()); } catch (AccumuloServerException e) { log.trace("lookupTablets failed server={}", tserver, e); throw e; diff --git a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java index 26abce5159..601dff11bb 100644 --- a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java +++ b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java @@ -521,7 +521,7 @@ public class ClientTabletCacheImplTest { tservers.get(src.getTserverLocation().orElseThrow()); if (tablets == null) { - parent.invalidateCache(context, src.getTserverLocation().orElseThrow()); + parent.invalidateCache(src.getExtent()); return null; } @@ -552,7 +552,7 @@ public class ClientTabletCacheImplTest { Map<KeyExtent,SortedMap<Key,Value>> tablets = tservers.get(tserver); if (tablets == null) { - parent.invalidateCache(context, tserver); + parent.invalidateCache(map.keySet()); return list; } @@ -619,10 +619,6 @@ public class ClientTabletCacheImplTest { return new CachedTablet(RootTable.EXTENT, rootTabletLoc, "1", TabletAvailability.HOSTED, false); } - - @Override - public void invalidateCache(ClientContext context, String server) {} - } static void createEmptyTablet(TServers tservers, String server, KeyExtent tablet) { @@ -784,7 +780,7 @@ public class ClientTabletCacheImplTest { // simulate a server failure setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, tab1e21, "tserver9"); - tab1TabletCache.invalidateCache(context, "tserver8"); + tab1TabletCache.invalidateCache(tab1e21); locateTabletTest(tab1TabletCache, "r1", tab1e22, "tserver6"); locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver9"); locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver4"); @@ -792,9 +788,9 @@ public class ClientTabletCacheImplTest { // simulate all servers failing deleteServer(tservers, "tserver1"); deleteServer(tservers, "tserver2"); - tab1TabletCache.invalidateCache(context, "tserver4"); - tab1TabletCache.invalidateCache(context, "tserver6"); - tab1TabletCache.invalidateCache(context, "tserver9"); + tab1TabletCache.invalidateCache(tab1e22); + tab1TabletCache.invalidateCache(tab1e21); + tab1TabletCache.invalidateCache(tab1e1); locateTabletTest(tab1TabletCache, "r1", null, null); locateTabletTest(tab1TabletCache, "h", null, null); @@ -849,7 +845,7 @@ public class ClientTabletCacheImplTest { // simulate metadata and regular server down and the reassigned deleteServer(tservers, "tserver5"); - tab1TabletCache.invalidateCache(context, "tserver7"); + tab1TabletCache.invalidateCache(tab1e1); locateTabletTest(tab1TabletCache, "a", null, null); locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8"); locateTabletTest(tab1TabletCache, "r", tab1e22, "tserver9"); @@ -861,7 +857,7 @@ public class ClientTabletCacheImplTest { locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver7"); locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8"); locateTabletTest(tab1TabletCache, "r", tab1e22, "tserver9"); - tab1TabletCache.invalidateCache(context, "tserver7"); + tab1TabletCache.invalidateCache(tab1e1); setLocation(tservers, "tserver10", mte1, tab1e1, "tserver2"); locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver2"); locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8"); diff --git a/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java b/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java index 37412f0e21..eed53e5212 100644 --- a/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java +++ b/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java @@ -51,7 +51,6 @@ public class RootClientTabletCacheTest { public void testInvalidateCache_Noop() { var rtl = new RootClientTabletCache(lockChecker); // it's not expected that any of the validate functions will do anything with the mock objects - rtl.invalidateCache(context, "server"); rtl.invalidateCache(RootTable.EXTENT); rtl.invalidateCache(); rtl.invalidateCache(List.of(RootTable.EXTENT));