This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new c88d368b2d Fixes race condition that caused unneeded user compactions 
to run (#4554)
c88d368b2d is described below

commit c88d368b2d3eb6384d42739c5322148bf9bcb219
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Mon May 13 17:04:19 2024 -0400

    Fixes race condition that caused unneeded user compactions to run (#4554)
    
    The following is an example of the problem this change fixes
    
     1. Thread 1: A user compaction is currently running for a tablet
     2. Thread 2: Tablet server receives a compaction request RPC from manager 
and it
        checks to see if the compaction is needed for the same tablet.  If 
finds it is needed.
     3. Thread 1: completes user compaction, so a compaction is no longer
        needed for the tablet
     4. Thread 2: Initiates a user compaction of the tablet because its
        check in step 2 passed.
    
    This change adds a second check in step 4 above that should prevent this 
race
    condition because the check is done at a point when its known no
    concurrent user compaction is running.  The original check was left as a
    fail fast check, but a comment was added explaining its not sufficient
    for correctness.
---
 .../accumulo/tserver/tablet/CompactableImpl.java   | 30 +++++++++--
 .../org/apache/accumulo/tserver/tablet/Tablet.java |  8 +++
 .../tablet/CompactableImplFileManagerTest.java     | 63 ++++++++++++++++++----
 3 files changed, 88 insertions(+), 13 deletions(-)

diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java
 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java
index fc84c038aa..37b23dc6c4 100644
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java
@@ -247,13 +247,32 @@ public class CompactableImpl implements Compactable {
 
     protected abstract long getNanoTime();
 
-    boolean initiateSelection(CompactionKind kind) {
+    /**
+     * @return the last id of the last successful user compaction
+     */
+    protected abstract long getLastCompactId();
+
+    boolean initiateSelection(CompactionKind kind, Long compactionId) {
 
-      Preconditions.checkArgument(kind == CompactionKind.SELECTOR || kind == 
CompactionKind.USER);
+      Preconditions.checkArgument(
+          kind == CompactionKind.SELECTOR && compactionId == null
+              || kind == CompactionKind.USER && compactionId != null,
+          "Unexpected kind and/or compaction id: %s %s", kind, compactionId);
 
       if (selectStatus == FileSelectionStatus.NOT_ACTIVE || (kind == 
CompactionKind.USER
           && selectKind == CompactionKind.SELECTOR && 
noneRunning(CompactionKind.SELECTOR)
           && selectStatus != FileSelectionStatus.SELECTING)) {
+
+        // Check compaction id when a lock is held and no other user 
compactions have files
+        // selected, at this point the results of any previous user 
compactions should be seen. If
+        // user compaction is currently running, then will not get this far 
because of the checks a
+        // few lines up.
+        if (kind == CompactionKind.USER && getLastCompactId() >= compactionId) 
{
+          // This user compaction has already completed, so no need to 
initiate selection of files
+          // for user compaction.
+          return false;
+        }
+
         selectStatus = FileSelectionStatus.NEW;
         selectKind = kind;
         selectedFiles.clear();
@@ -728,6 +747,11 @@ public class CompactableImpl implements Compactable {
       protected long getNanoTime() {
         return System.nanoTime();
       }
+
+      @Override
+      protected long getLastCompactId() {
+        return tablet.getLastCompactId();
+      }
     };
   }
 
@@ -1030,7 +1054,7 @@ public class CompactableImpl implements Compactable {
         return;
       }
 
-      if (fileMgr.initiateSelection(kind)) {
+      if (fileMgr.initiateSelection(kind, compactionId)) {
         this.chelper = localHelper;
         this.compactionId = compactionId;
         this.compactionConfig = compactionConfig;
diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
index 6a646c4f57..1fac59303c 100644
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
@@ -174,6 +174,10 @@ public class Tablet extends TabletBase {
   private AtomicLong lastFlushID = new AtomicLong(-1);
   private AtomicLong lastCompactID = new AtomicLong(-1);
 
+  public long getLastCompactId() {
+    return lastCompactID.get();
+  }
+
   private static class CompactionWaitInfo {
     long flushID = -1;
     long compactionID = -1;
@@ -2047,6 +2051,10 @@ public class Tablet extends TabletBase {
   public void compactAll(long compactionId, CompactionConfig compactionConfig) 
{
 
     synchronized (this) {
+      // This check will quickly ignore stale request from the manager, 
however its not sufficient
+      // for correctness. This same check is done again later at a point when 
no compactions could
+      // be running concurrently to avoid race conditions. If the check passes 
here, it possible a
+      // concurrent compaction could change lastCompactID after the check 
succeeds.
       if (lastCompactID.get() >= compactionId) {
         return;
       }
diff --git 
a/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java
 
b/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java
index 8214823be3..743ac8feca 100644
--- 
a/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java
+++ 
b/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java
@@ -30,8 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.time.Duration;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Optional;
 import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -118,7 +120,7 @@ public class CompactableImplFileManagerTest {
         fileMgr.getCandidates(tabletFiles, SYSTEM, false));
     assertNoCandidates(fileMgr, tabletFiles, CHOP, USER, SELECTOR);
 
-    assertTrue(fileMgr.initiateSelection(USER));
+    assertTrue(fileMgr.initiateSelection(USER, 1L));
 
     assertFalse(fileMgr.reserveFiles(staleJob));
 
@@ -191,7 +193,7 @@ public class CompactableImplFileManagerTest {
     TestFileManager fileMgr = new TestFileManager();
     var tabletFiles = newFiles("F00000.rf", "F00001.rf", "F00002.rf", 
"F00003.rf");
 
-    assertTrue(fileMgr.initiateSelection(USER));
+    assertTrue(fileMgr.initiateSelection(USER, 1L));
     assertTrue(fileMgr.beginSelection());
     fileMgr.finishSelection(newFiles("F00000.rf", "F00001.rf", "F00002.rf", 
"F00003.rf"), false);
     assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus());
@@ -233,7 +235,7 @@ public class CompactableImplFileManagerTest {
   public void testSelectionExpirationDisjoint() {
     TestFileManager fileMgr = new TestFileManager();
 
-    assertTrue(fileMgr.initiateSelection(USER));
+    assertTrue(fileMgr.initiateSelection(USER, 1L));
     assertTrue(fileMgr.beginSelection());
     fileMgr.finishSelection(newFiles("F00000.rf", "F00001.rf"), false);
     assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus());
@@ -268,7 +270,7 @@ public class CompactableImplFileManagerTest {
     var job1 = newJob(SYSTEM, "F00000.rf", "F00001.rf");
     assertTrue(fileMgr.reserveFiles(job1));
 
-    assertTrue(fileMgr.initiateSelection(USER));
+    assertTrue(fileMgr.initiateSelection(USER, 1L));
 
     // selection was initiated, so a new system compaction should not be able 
to start
     assertFalse(fileMgr.reserveFiles(newJob(SYSTEM, "F00002.rf", 
"F00003.rf")));
@@ -292,11 +294,11 @@ public class CompactableImplFileManagerTest {
   public void testUserCompactionPreemptsSelectorCompaction() {
     TestFileManager fileMgr = new TestFileManager();
 
-    assertTrue(fileMgr.initiateSelection(SELECTOR));
+    assertTrue(fileMgr.initiateSelection(SELECTOR, null));
     assertEquals(SELECTOR, fileMgr.getSelectionKind());
     assertTrue(fileMgr.beginSelection());
     // USER compaction should not be able to preempt while in the middle of 
selecting files
-    assertFalse(fileMgr.initiateSelection(USER));
+    assertFalse(fileMgr.initiateSelection(USER, 1L));
     assertEquals(SELECTOR, fileMgr.getSelectionKind());
     fileMgr.finishSelection(newFiles("F00000.rf", "F00001.rf", "F00002.rf"), 
false);
     // check state is as expected after finishing selection
@@ -306,7 +308,7 @@ public class CompactableImplFileManagerTest {
 
     // USER compaction should not be able to preempt when there are running 
compactions.
     fileMgr.running.add(SELECTOR);
-    assertFalse(fileMgr.initiateSelection(USER));
+    assertFalse(fileMgr.initiateSelection(USER, 1L));
     // check state is as expected
     assertEquals(SELECTOR, fileMgr.getSelectionKind());
     assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus());
@@ -315,7 +317,7 @@ public class CompactableImplFileManagerTest {
     // after file selection is complete and there are no running compactions, 
should be able to
     // preempt
     fileMgr.running.clear();
-    assertTrue(fileMgr.initiateSelection(USER));
+    assertTrue(fileMgr.initiateSelection(USER, 1L));
     // check that things were properly reset
     assertEquals(USER, fileMgr.getSelectionKind());
     assertEquals(FileSelectionStatus.NEW, fileMgr.getSelectionStatus());
@@ -327,7 +329,7 @@ public class CompactableImplFileManagerTest {
     TestFileManager fileMgr = new TestFileManager();
     var tabletFiles = newFiles("F00000.rf", "F00001.rf", "F00002.rf", 
"F00003.rf", "F00004.rf");
 
-    assertTrue(fileMgr.initiateSelection(USER));
+    assertTrue(fileMgr.initiateSelection(USER, 1L));
     assertTrue(fileMgr.beginSelection());
     fileMgr.finishSelection(
         newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf", 
"F00004.rf"), false);
@@ -337,7 +339,7 @@ public class CompactableImplFileManagerTest {
     fileMgr.userCompactionCanceled();
     assertEquals(FileSelectionStatus.NOT_ACTIVE, fileMgr.getSelectionStatus());
 
-    assertTrue(fileMgr.initiateSelection(USER));
+    assertTrue(fileMgr.initiateSelection(USER, 1L));
     assertTrue(fileMgr.beginSelection());
     fileMgr.finishSelection(
         newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf", 
"F00004.rf"), false);
@@ -433,6 +435,41 @@ public class CompactableImplFileManagerTest {
 
   }
 
+  @Test
+  public void testComletedUserCompaction() {
+    TestFileManager fileMgr = new TestFileManager();
+
+    fileMgr.lastCompactId.set(2);
+    // should fail to initiate because the last compact id is equal
+    assertFalse(fileMgr.initiateSelection(USER, 2L));
+    assertEquals(FileSelectionStatus.NOT_ACTIVE, fileMgr.selectStatus);
+
+    fileMgr.lastCompactId.set(3);
+    // should fail to initiate because the last compact id is greater than
+    assertFalse(fileMgr.initiateSelection(USER, 2L));
+    assertEquals(FileSelectionStatus.NOT_ACTIVE, fileMgr.selectStatus);
+
+    fileMgr.lastCompactId.set(1);
+    assertTrue(fileMgr.initiateSelection(USER, 2L));
+    assertEquals(FileSelectionStatus.NEW, fileMgr.selectStatus);
+
+    assertTrue(fileMgr.beginSelection());
+    fileMgr.finishSelection(
+        newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf", 
"F00004.rf"), false);
+    assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus());
+  }
+
+  @Test
+  public void testIllegalInitiateArgs() {
+    TestFileManager fileMgr = new TestFileManager();
+    assertThrows(IllegalArgumentException.class, () -> 
fileMgr.initiateSelection(USER, null));
+    assertThrows(IllegalArgumentException.class, () -> 
fileMgr.initiateSelection(SELECTOR, 2L));
+    for (var kind : List.of(SYSTEM, CHOP)) {
+      assertThrows(IllegalArgumentException.class, () -> 
fileMgr.initiateSelection(kind, 2L));
+      assertThrows(IllegalArgumentException.class, () -> 
fileMgr.initiateSelection(kind, null));
+    }
+  }
+
   private void assertNoCandidates(TestFileManager fileMgr, 
Set<StoredTabletFile> tabletFiles,
       CompactionKind... kinds) {
     for (CompactionKind kind : kinds) {
@@ -446,6 +483,7 @@ public class CompactableImplFileManagerTest {
     public static final Duration SELECTION_EXPIRATION = Duration.ofMinutes(2);
     private long time = 0;
     public Set<CompactionKind> running = new HashSet<>();
+    public AtomicLong lastCompactId = new AtomicLong(0);
 
     public TestFileManager() {
       super(new KeyExtent(TableId.of("1"), null, null), Set.of(), 
Optional.empty(),
@@ -470,6 +508,11 @@ public class CompactableImplFileManagerTest {
       return time;
     }
 
+    @Override
+    protected long getLastCompactId() {
+      return lastCompactId.get();
+    }
+
     void setNanoTime(long t) {
       time = t;
     }

Reply via email to