This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch elasticity
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/elasticity by this push:
     new f586e7f9c9 avoid unnecessary metadata reads in client tablet cache 
(#4432)
f586e7f9c9 is described below

commit f586e7f9c9a82c0f01cba1ac279a7d583c7be3c4
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Fri Apr 5 12:05:21 2024 -0400

    avoid unnecessary metadata reads in client tablet cache (#4432)
    
    For ondemand tablets the client tablet cache caches tablets w/o a
    location.  There was a bug fixed in #4280 where the cache would do a
    metadata table lookup for each mutation when tablets had no location.
    The fix in #4280 only partially fixed the problem, after that change
    more metadata lookups than needed were still being done.
    
    Also there was a bug with the batchscanner that #4280 did not address.
    Before this change when tablets had no location, the batch scanner would
    do a metadata lookup for each range passed to the batch scanner (well
    the client tablet cache would these metadata lookups on behalf of the
    batch scanner).  For example before this change if the batch scanner was
    given 10K ranges that all fell in a single tablet w/o a location, it
    would do 10K metadata lookups.  After this change for that situation it
    will do a single metadata lookup.
    
    This change minimizes the metadata lookups done by the batch writer and
    batch scanner.  The fix is to make sure that cached entries populated by
    looking up one range or mutation are used by subsequent range or
    mutations lookups, even if there is no location present. This is done by
    always reusing cache entries that were created after work started on a
    batch of mutations or ranges.  Cache entries w/o a location that existed
    before work started on a batch are ignored.  By reusing cache entries
    created after starting work on a batch we minimize metadata lookups.
    
    A test was also added to ensure the client tablet cache does not do
    excessive metadata table lookups.  If this test had existed, it would
    have caught the problem.
---
 .../core/clientImpl/ClientTabletCache.java         |   8 +-
 .../core/clientImpl/ClientTabletCacheImpl.java     |  66 ++++++----
 .../core/clientImpl/ClientTabletCacheImplTest.java | 137 +++++++++++++++++++++
 3 files changed, 181 insertions(+), 30 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
index 2564fb0063..a31ca2418b 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
@@ -20,7 +20,6 @@ package org.apache.accumulo.core.clientImpl;
 
 import static com.google.common.base.Preconditions.checkArgument;
 
-import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -46,6 +45,7 @@ import org.apache.accumulo.core.singletons.SingletonManager;
 import org.apache.accumulo.core.singletons.SingletonService;
 import org.apache.accumulo.core.util.Interner;
 import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.accumulo.core.util.time.NanoTime;
 import org.apache.hadoop.io.Text;
 
 import com.google.common.base.Preconditions;
@@ -311,7 +311,7 @@ public abstract class ClientTabletCache {
     private final TabletAvailability availability;
     private final boolean hostingRequested;
 
-    private final Long creationTime = System.nanoTime();
+    private final NanoTime creationTime = NanoTime.now();
 
     public CachedTablet(KeyExtent tablet_extent, String tablet_location, 
String session,
         TabletAvailability availability, boolean hostingRequested) {
@@ -392,8 +392,8 @@ public abstract class ClientTabletCache {
       return this.availability;
     }
 
-    public Duration getAge() {
-      return Duration.ofNanos(System.nanoTime() - creationTime);
+    public NanoTime getCreationTime() {
+      return creationTime;
     }
 
     public boolean wasHostingRequested() {
diff --git 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
index 10fb3aa21e..eb66665225 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
@@ -60,6 +60,7 @@ import org.apache.accumulo.core.trace.TraceUtil;
 import org.apache.accumulo.core.util.OpTimer;
 import org.apache.accumulo.core.util.Pair;
 import org.apache.accumulo.core.util.TextUtil;
+import org.apache.accumulo.core.util.time.NanoTime;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparator;
 import org.slf4j.Logger;
@@ -234,23 +235,17 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
 
       wLock.lock();
       try {
-        CachedTablet lastTablet = null;
+        // Want to ignore any entries in the cache w/o a location that were 
created before the
+        // following time. Entries created after the following time may have 
been populated by the
+        // following loop, and we want to use those.
+        var cacheCutoff = NanoTime.now();
+
         for (T mutation : notInCache) {
 
           row.set(mutation.getRow());
 
-          // ELASTICITY_TODO using lastTablet avoids doing a metadata table 
lookup per mutation.
-          // However this still does at least one metadata lookup per tablet. 
This is not as good as
-          // the pre-elasticity code that would lookup N tablets at once and 
use them to bin
-          // mutations. So there is further room for improvement in the way 
this code interacts with
-          // cache and metadata table.
-          CachedTablet tl;
-          if (lastTablet != null && lastTablet.getExtent().contains(row)) {
-            tl = lastTablet;
-          } else {
-            tl = _findTablet(context, row, false, false, false, lcSession, 
LocationNeed.REQUIRED);
-            lastTablet = tl;
-          }
+          CachedTablet tl = _findTablet(context, row, false, false, false, 
lcSession,
+              LocationNeed.REQUIRED, cacheCutoff);
 
           if (!addMutation(binnedMutations, mutation, tl, lcSession)) {
             failures.add(mutation);
@@ -331,6 +326,11 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
     List<Range> failures = new ArrayList<>();
     List<CachedTablet> cachedTablets = new ArrayList<>();
 
+    // Use anything in the cache w/o a location populated after this point in 
time. Cache entries
+    // w/o a location created before the following time should be ignored and 
the metadata table
+    // consulted.
+    var cacheCutoff = NanoTime.now();
+
     l1: for (Range range : ranges) {
 
       cachedTablets.clear();
@@ -348,7 +348,8 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
       if (useCache) {
         tl = lcSession.checkLock(findTabletInCache(startRow));
       } else {
-        tl = _findTablet(context, startRow, false, false, false, lcSession, 
locationNeed);
+        tl = _findTablet(context, startRow, false, false, false, lcSession, 
locationNeed,
+            cacheCutoff);
       }
 
       if (tl == null) {
@@ -358,6 +359,7 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
 
       cachedTablets.add(tl);
 
+      // a range may extend over multiple tablets, look for additional tablet 
that overlap the range
       while (tl.getExtent().endRow() != null
           && !range.afterEndKey(new 
Key(tl.getExtent().endRow()).followingKey(PartialKey.ROW))) {
         if (useCache) {
@@ -366,7 +368,7 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
           tl = lcSession.checkLock(findTabletInCache(row));
         } else {
           tl = _findTablet(context, tl.getExtent().endRow(), true, false, 
false, lcSession,
-              locationNeed);
+              locationNeed, cacheCutoff);
         }
 
         if (tl == null) {
@@ -560,7 +562,8 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
     }
 
     LockCheckerSession lcSession = new LockCheckerSession();
-    CachedTablet tl = _findTablet(context, row, skipRow, false, true, 
lcSession, locationNeed);
+    CachedTablet tl =
+        _findTablet(context, row, skipRow, false, true, lcSession, 
locationNeed, NanoTime.now());
 
     if (timer != null) {
       timer.stop();
@@ -610,14 +613,19 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
 
       var currTablet = extent;
 
+      // Use anything in the cache w/o a location populated after this point 
in time. Cache entries
+      // w/o a location created before the following time should be ignored 
and the metadata table
+      // consulted.
+      var cacheCutoff = NanoTime.now();
+
       for (int i = 0; i < hostAheadCount; i++) {
         if (currTablet.endRow() == null || hostAheadRange
             .afterEndKey(new 
Key(currTablet.endRow()).followingKey(PartialKey.ROW))) {
           break;
         }
 
-        CachedTablet followingTablet =
-            _findTablet(context, currTablet.endRow(), true, false, true, 
lcSession, locationNeed);
+        CachedTablet followingTablet = _findTablet(context, 
currTablet.endRow(), true, false, true,
+            lcSession, locationNeed, cacheCutoff);
 
         if (followingTablet == null) {
           break;
@@ -679,14 +687,14 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
 
     List<TKeyExtent> extentsToBringOnline = new ArrayList<>();
     for (var cachedTablet : tabletsWithNoLocation) {
-      if (cachedTablet.getAge().compareTo(STALE_DURATION) < 0) {
+      if (cachedTablet.getCreationTime().elapsed().compareTo(STALE_DURATION) < 
0) {
         if (cachedTablet.getAvailability() == TabletAvailability.ONDEMAND) {
           if (!cachedTablet.wasHostingRequested()) {
             extentsToBringOnline.add(cachedTablet.getExtent().toThrift());
             log.trace("requesting ondemand tablet to be hosted {}", 
cachedTablet.getExtent());
           } else {
             log.trace("ignoring ondemand tablet that already has a hosting 
request in place {} {}",
-                cachedTablet.getExtent(), cachedTablet.getAge());
+                cachedTablet.getExtent(), 
cachedTablet.getCreationTime().elapsed());
           }
         } else if (cachedTablet.getAvailability() == 
TabletAvailability.UNHOSTED) {
           throw new InvalidTabletHostingRequestException("Extent " + 
cachedTablet.getExtent()
@@ -855,10 +863,15 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
     return null;
   }
 
+  /**
+   * @param cacheCutoff Tablets w/o locations are cached. When LocationNeed is 
REQUIRED, this cut
+   *        off is used to determine if cached entries w/o a location should 
be used or of we should
+   *        instead ignore them and reread the tablet information from the 
metadata table.
+   */
   protected CachedTablet _findTablet(ClientContext context, Text row, boolean 
skipRow,
-      boolean retry, boolean lock, LockCheckerSession lcSession, LocationNeed 
locationNeed)
-      throws AccumuloException, AccumuloSecurityException, 
TableNotFoundException,
-      InvalidTabletHostingRequestException {
+      boolean retry, boolean lock, LockCheckerSession lcSession, LocationNeed 
locationNeed,
+      NanoTime cacheCutoff) throws AccumuloException, 
AccumuloSecurityException,
+      TableNotFoundException, InvalidTabletHostingRequestException {
 
     if (skipRow) {
       row = new Text(row);
@@ -878,9 +891,10 @@ public class ClientTabletCacheImpl extends 
ClientTabletCache {
       tl = processInvalidatedAndCheckLock(context, lcSession, row);
     }
 
-    if (tl == null
-        || (locationNeed == LocationNeed.REQUIRED && 
tl.getTserverLocation().isEmpty())) {
-      // not in cache, so obtain info
+    if (tl == null || (locationNeed == LocationNeed.REQUIRED && 
tl.getTserverLocation().isEmpty()
+        && tl.getCreationTime().compareTo(cacheCutoff) < 0)) {
+      // not in cache OR the cached entry was created before the cut off time, 
so obtain info from
+      // metadata table
       if (lock) {
         wLock.lock();
         try {
diff --git 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
index 4b13ac638a..57350292ea 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
@@ -25,6 +25,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -36,6 +37,7 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
+import java.util.function.BiConsumer;
 
 import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.client.TableNotFoundException;
@@ -493,20 +495,25 @@ public class ClientTabletCacheImplTest {
 
   static class TServers {
     private final Map<String,Map<KeyExtent,SortedMap<Key,Value>>> tservers = 
new HashMap<>();
+    private BiConsumer<CachedTablet,Text> lookupConsumer = (cachedTablet, row) 
-> {};
   }
 
   static class TestCachedTabletObtainer implements CachedTabletObtainer {
 
     private final Map<String,Map<KeyExtent,SortedMap<Key,Value>>> tservers;
+    private final BiConsumer<CachedTablet,Text> lookupConsumer;
 
     TestCachedTabletObtainer(TServers tservers) {
       this.tservers = tservers.tservers;
+      this.lookupConsumer = tservers.lookupConsumer;
     }
 
     @Override
     public CachedTablets lookupTablet(ClientContext context, CachedTablet src, 
Text row,
         Text stopRow, ClientTabletCache parent) {
 
+      lookupConsumer.accept(src, row);
+
       Map<KeyExtent,SortedMap<Key,Value>> tablets =
           tservers.get(src.getTserverLocation().orElseThrow());
 
@@ -1835,4 +1842,134 @@ public class ClientTabletCacheImplTest {
 
   }
 
+  /**
+   * This test ensures the cache does not query the metadata table more than 
is needed. This
+   * behavior is very important for performance.
+   */
+  @Test
+  public void testLookupCounts() throws Exception {
+
+    List<KeyExtent> lookups = new ArrayList<>();
+    TServers tservers = new TServers();
+    tservers.lookupConsumer = (src, row) -> lookups.add(src.getExtent());
+
+    ClientTabletCacheImpl metaCache = createLocators(tservers, "tserver1", 
"tserver2", "foo");
+
+    var ke1 = createNewKeyExtent("foo", "g", null);
+    var ke2 = createNewKeyExtent("foo", "m", "g");
+    var ke3 = createNewKeyExtent("foo", "r", "m");
+    var ke4 = createNewKeyExtent("foo", null, "r");
+
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke1, null, null);
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke2, null, null);
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke3, null, null);
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke4, null, null);
+
+    List<Mutation> ml = new ArrayList<>();
+    for (char c = 'a'; c <= 'z'; c++) {
+      ml.add(createNewMutation("" + c, "cf1:cq1=v3", "cf1:cq2=v4"));
+    }
+    Map<String,TabletServerMutations<Mutation>> binnedMutations = new 
HashMap<>();
+    List<Mutation> afailures = new ArrayList<>();
+    assertEquals(List.of(), lookups);
+    metaCache.binMutations(context, ml, binnedMutations, afailures);
+    assertTrue(binnedMutations.isEmpty());
+    assertEquals(ml.size(), afailures.size());
+    // since all tablets are in the same metadata table, should only see one 
lookup to the metadata
+    // table
+    assertEquals(List.of(ROOT_TABLE_EXTENT, METADATA_TABLE_EXTENT), lookups);
+
+    // since tablets had no locations, attempting to binMutations again should 
go back to the
+    // metadata table once
+    lookups.clear();
+    binnedMutations.clear();
+    afailures.clear();
+    metaCache.binMutations(context, ml, binnedMutations, afailures);
+    assertTrue(binnedMutations.isEmpty());
+    assertEquals(ml.size(), afailures.size());
+    assertEquals(List.of(METADATA_TABLE_EXTENT), lookups);
+
+    // test binning ranges
+    var range1 = createNewRange("a", "c");
+    var range2 = createNewRange("h", "o");
+    var range3 = createNewRange("x", "z");
+    var ranges = List.of(range1, range2, range3);
+
+    // binning ranges should do a single metadata lookup since location is 
required and what is
+    // currently cached has no location
+    BiConsumer<CachedTablet,Range> rangeConsumer =
+        (t, r) -> fail("Tablet have no locations, so should not bin");
+    lookups.clear();
+    var failures = metaCache.findTablets(context, ranges, rangeConsumer, 
LocationNeed.REQUIRED);
+    assertEquals(3, failures.size());
+    assertEquals(List.of(METADATA_TABLE_EXTENT), lookups);
+
+    // should do a single metadata lookup since the cached entries have no 
location
+    lookups.clear();
+    failures = metaCache.findTablets(context, ranges, rangeConsumer, 
LocationNeed.REQUIRED);
+    assertEquals(3, failures.size());
+    assertEquals(List.of(METADATA_TABLE_EXTENT), lookups);
+
+    // since location is not required and the currently cached entries have no 
locations the
+    // following should not do metadata lookups
+    Map<KeyExtent,List<Range>> seen = new HashMap<>();
+    rangeConsumer = (t, r) -> {
+      seen.computeIfAbsent(t.getExtent(), k -> new ArrayList<>()).add(r);
+      assertTrue(t.getTserverLocation().isEmpty());
+    };
+    lookups.clear();
+    failures = metaCache.findTablets(context, ranges, rangeConsumer, 
LocationNeed.NOT_REQUIRED);
+    assertEquals(List.of(), lookups);
+    assertEquals(Map.of(ke1, List.of(range1), ke2, List.of(range2), ke3, 
List.of(range2), ke4,
+        List.of(range3)), seen);
+    assertEquals(0, failures.size());
+
+    // set locations
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke1, "T1", "I1");
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke2, "T2", "I2");
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke3, "T3", "I3");
+    setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, ke4, "T3", "I3");
+
+    // now that locations are set, all mutations should bin.. should only do 
one metadata lookup to
+    // get the locations
+    lookups.clear();
+    binnedMutations.clear();
+    afailures.clear();
+    assertEquals(List.of(), lookups);
+    metaCache.binMutations(context, ml, binnedMutations, afailures);
+    assertEquals(Set.of("T1", "T2", "T3"), binnedMutations.keySet());
+    assertEquals(7, binnedMutations.get("T1").getMutations().get(ke1).size());
+    assertEquals(6, binnedMutations.get("T2").getMutations().get(ke2).size());
+    assertEquals(5, binnedMutations.get("T3").getMutations().get(ke3).size());
+    assertEquals(8, binnedMutations.get("T3").getMutations().get(ke4).size());
+    assertEquals(0, afailures.size());
+    assertEquals(List.of(METADATA_TABLE_EXTENT), lookups);
+
+    // binning mutations again should do zero metadata lookups
+    lookups.clear();
+    binnedMutations.clear();
+    afailures.clear();
+    metaCache.binMutations(context, ml, binnedMutations, afailures);
+    assertEquals(Set.of("T1", "T2", "T3"), binnedMutations.keySet());
+    assertEquals(7, binnedMutations.get("T1").getMutations().get(ke1).size());
+    assertEquals(6, binnedMutations.get("T2").getMutations().get(ke2).size());
+    assertEquals(5, binnedMutations.get("T3").getMutations().get(ke3).size());
+    assertEquals(8, binnedMutations.get("T3").getMutations().get(ke4).size());
+    assertEquals(0, afailures.size());
+    assertEquals(List.of(), lookups);
+
+    // since the cached entries have locations there should be no metadata 
lookups when binning
+    // ranges
+    seen.clear();
+    rangeConsumer = (t, r) -> {
+      seen.computeIfAbsent(t.getExtent(), k -> new ArrayList<>()).add(r);
+      assertTrue(t.getTserverLocation().isPresent());
+    };
+    lookups.clear();
+    failures = metaCache.findTablets(context, ranges, rangeConsumer, 
LocationNeed.REQUIRED);
+    assertEquals(List.of(), lookups);
+    assertEquals(Map.of(ke1, List.of(range1), ke2, List.of(range2), ke3, 
List.of(range2), ke4,
+        List.of(range3)), seen);
+    assertEquals(0, failures.size());
+  }
 }

Reply via email to