This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new c88d368b2d Fixes race condition that caused unneeded user compactions to run (#4554) c88d368b2d is described below commit c88d368b2d3eb6384d42739c5322148bf9bcb219 Author: Keith Turner <ktur...@apache.org> AuthorDate: Mon May 13 17:04:19 2024 -0400 Fixes race condition that caused unneeded user compactions to run (#4554) The following is an example of the problem this change fixes 1. Thread 1: A user compaction is currently running for a tablet 2. Thread 2: Tablet server receives a compaction request RPC from manager and it checks to see if the compaction is needed for the same tablet. If finds it is needed. 3. Thread 1: completes user compaction, so a compaction is no longer needed for the tablet 4. Thread 2: Initiates a user compaction of the tablet because its check in step 2 passed. This change adds a second check in step 4 above that should prevent this race condition because the check is done at a point when its known no concurrent user compaction is running. The original check was left as a fail fast check, but a comment was added explaining its not sufficient for correctness. --- .../accumulo/tserver/tablet/CompactableImpl.java | 30 +++++++++-- .../org/apache/accumulo/tserver/tablet/Tablet.java | 8 +++ .../tablet/CompactableImplFileManagerTest.java | 63 ++++++++++++++++++---- 3 files changed, 88 insertions(+), 13 deletions(-) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java index fc84c038aa..37b23dc6c4 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/CompactableImpl.java @@ -247,13 +247,32 @@ public class CompactableImpl implements Compactable { protected abstract long getNanoTime(); - boolean initiateSelection(CompactionKind kind) { + /** + * @return the last id of the last successful user compaction + */ + protected abstract long getLastCompactId(); + + boolean initiateSelection(CompactionKind kind, Long compactionId) { - Preconditions.checkArgument(kind == CompactionKind.SELECTOR || kind == CompactionKind.USER); + Preconditions.checkArgument( + kind == CompactionKind.SELECTOR && compactionId == null + || kind == CompactionKind.USER && compactionId != null, + "Unexpected kind and/or compaction id: %s %s", kind, compactionId); if (selectStatus == FileSelectionStatus.NOT_ACTIVE || (kind == CompactionKind.USER && selectKind == CompactionKind.SELECTOR && noneRunning(CompactionKind.SELECTOR) && selectStatus != FileSelectionStatus.SELECTING)) { + + // Check compaction id when a lock is held and no other user compactions have files + // selected, at this point the results of any previous user compactions should be seen. If + // user compaction is currently running, then will not get this far because of the checks a + // few lines up. + if (kind == CompactionKind.USER && getLastCompactId() >= compactionId) { + // This user compaction has already completed, so no need to initiate selection of files + // for user compaction. + return false; + } + selectStatus = FileSelectionStatus.NEW; selectKind = kind; selectedFiles.clear(); @@ -728,6 +747,11 @@ public class CompactableImpl implements Compactable { protected long getNanoTime() { return System.nanoTime(); } + + @Override + protected long getLastCompactId() { + return tablet.getLastCompactId(); + } }; } @@ -1030,7 +1054,7 @@ public class CompactableImpl implements Compactable { return; } - if (fileMgr.initiateSelection(kind)) { + if (fileMgr.initiateSelection(kind, compactionId)) { this.chelper = localHelper; this.compactionId = compactionId; this.compactionConfig = compactionConfig; diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java index 6a646c4f57..1fac59303c 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java @@ -174,6 +174,10 @@ public class Tablet extends TabletBase { private AtomicLong lastFlushID = new AtomicLong(-1); private AtomicLong lastCompactID = new AtomicLong(-1); + public long getLastCompactId() { + return lastCompactID.get(); + } + private static class CompactionWaitInfo { long flushID = -1; long compactionID = -1; @@ -2047,6 +2051,10 @@ public class Tablet extends TabletBase { public void compactAll(long compactionId, CompactionConfig compactionConfig) { synchronized (this) { + // This check will quickly ignore stale request from the manager, however its not sufficient + // for correctness. This same check is done again later at a point when no compactions could + // be running concurrently to avoid race conditions. If the check passes here, it possible a + // concurrent compaction could change lastCompactID after the check succeeds. if (lastCompactID.get() >= compactionId) { return; } diff --git a/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java b/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java index 8214823be3..743ac8feca 100644 --- a/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java +++ b/server/tserver/src/test/java/org/apache/accumulo/tserver/tablet/CompactableImplFileManagerTest.java @@ -30,8 +30,10 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.time.Duration; import java.util.HashSet; +import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -118,7 +120,7 @@ public class CompactableImplFileManagerTest { fileMgr.getCandidates(tabletFiles, SYSTEM, false)); assertNoCandidates(fileMgr, tabletFiles, CHOP, USER, SELECTOR); - assertTrue(fileMgr.initiateSelection(USER)); + assertTrue(fileMgr.initiateSelection(USER, 1L)); assertFalse(fileMgr.reserveFiles(staleJob)); @@ -191,7 +193,7 @@ public class CompactableImplFileManagerTest { TestFileManager fileMgr = new TestFileManager(); var tabletFiles = newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf"); - assertTrue(fileMgr.initiateSelection(USER)); + assertTrue(fileMgr.initiateSelection(USER, 1L)); assertTrue(fileMgr.beginSelection()); fileMgr.finishSelection(newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf"), false); assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus()); @@ -233,7 +235,7 @@ public class CompactableImplFileManagerTest { public void testSelectionExpirationDisjoint() { TestFileManager fileMgr = new TestFileManager(); - assertTrue(fileMgr.initiateSelection(USER)); + assertTrue(fileMgr.initiateSelection(USER, 1L)); assertTrue(fileMgr.beginSelection()); fileMgr.finishSelection(newFiles("F00000.rf", "F00001.rf"), false); assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus()); @@ -268,7 +270,7 @@ public class CompactableImplFileManagerTest { var job1 = newJob(SYSTEM, "F00000.rf", "F00001.rf"); assertTrue(fileMgr.reserveFiles(job1)); - assertTrue(fileMgr.initiateSelection(USER)); + assertTrue(fileMgr.initiateSelection(USER, 1L)); // selection was initiated, so a new system compaction should not be able to start assertFalse(fileMgr.reserveFiles(newJob(SYSTEM, "F00002.rf", "F00003.rf"))); @@ -292,11 +294,11 @@ public class CompactableImplFileManagerTest { public void testUserCompactionPreemptsSelectorCompaction() { TestFileManager fileMgr = new TestFileManager(); - assertTrue(fileMgr.initiateSelection(SELECTOR)); + assertTrue(fileMgr.initiateSelection(SELECTOR, null)); assertEquals(SELECTOR, fileMgr.getSelectionKind()); assertTrue(fileMgr.beginSelection()); // USER compaction should not be able to preempt while in the middle of selecting files - assertFalse(fileMgr.initiateSelection(USER)); + assertFalse(fileMgr.initiateSelection(USER, 1L)); assertEquals(SELECTOR, fileMgr.getSelectionKind()); fileMgr.finishSelection(newFiles("F00000.rf", "F00001.rf", "F00002.rf"), false); // check state is as expected after finishing selection @@ -306,7 +308,7 @@ public class CompactableImplFileManagerTest { // USER compaction should not be able to preempt when there are running compactions. fileMgr.running.add(SELECTOR); - assertFalse(fileMgr.initiateSelection(USER)); + assertFalse(fileMgr.initiateSelection(USER, 1L)); // check state is as expected assertEquals(SELECTOR, fileMgr.getSelectionKind()); assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus()); @@ -315,7 +317,7 @@ public class CompactableImplFileManagerTest { // after file selection is complete and there are no running compactions, should be able to // preempt fileMgr.running.clear(); - assertTrue(fileMgr.initiateSelection(USER)); + assertTrue(fileMgr.initiateSelection(USER, 1L)); // check that things were properly reset assertEquals(USER, fileMgr.getSelectionKind()); assertEquals(FileSelectionStatus.NEW, fileMgr.getSelectionStatus()); @@ -327,7 +329,7 @@ public class CompactableImplFileManagerTest { TestFileManager fileMgr = new TestFileManager(); var tabletFiles = newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf", "F00004.rf"); - assertTrue(fileMgr.initiateSelection(USER)); + assertTrue(fileMgr.initiateSelection(USER, 1L)); assertTrue(fileMgr.beginSelection()); fileMgr.finishSelection( newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf", "F00004.rf"), false); @@ -337,7 +339,7 @@ public class CompactableImplFileManagerTest { fileMgr.userCompactionCanceled(); assertEquals(FileSelectionStatus.NOT_ACTIVE, fileMgr.getSelectionStatus()); - assertTrue(fileMgr.initiateSelection(USER)); + assertTrue(fileMgr.initiateSelection(USER, 1L)); assertTrue(fileMgr.beginSelection()); fileMgr.finishSelection( newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf", "F00004.rf"), false); @@ -433,6 +435,41 @@ public class CompactableImplFileManagerTest { } + @Test + public void testComletedUserCompaction() { + TestFileManager fileMgr = new TestFileManager(); + + fileMgr.lastCompactId.set(2); + // should fail to initiate because the last compact id is equal + assertFalse(fileMgr.initiateSelection(USER, 2L)); + assertEquals(FileSelectionStatus.NOT_ACTIVE, fileMgr.selectStatus); + + fileMgr.lastCompactId.set(3); + // should fail to initiate because the last compact id is greater than + assertFalse(fileMgr.initiateSelection(USER, 2L)); + assertEquals(FileSelectionStatus.NOT_ACTIVE, fileMgr.selectStatus); + + fileMgr.lastCompactId.set(1); + assertTrue(fileMgr.initiateSelection(USER, 2L)); + assertEquals(FileSelectionStatus.NEW, fileMgr.selectStatus); + + assertTrue(fileMgr.beginSelection()); + fileMgr.finishSelection( + newFiles("F00000.rf", "F00001.rf", "F00002.rf", "F00003.rf", "F00004.rf"), false); + assertEquals(FileSelectionStatus.SELECTED, fileMgr.getSelectionStatus()); + } + + @Test + public void testIllegalInitiateArgs() { + TestFileManager fileMgr = new TestFileManager(); + assertThrows(IllegalArgumentException.class, () -> fileMgr.initiateSelection(USER, null)); + assertThrows(IllegalArgumentException.class, () -> fileMgr.initiateSelection(SELECTOR, 2L)); + for (var kind : List.of(SYSTEM, CHOP)) { + assertThrows(IllegalArgumentException.class, () -> fileMgr.initiateSelection(kind, 2L)); + assertThrows(IllegalArgumentException.class, () -> fileMgr.initiateSelection(kind, null)); + } + } + private void assertNoCandidates(TestFileManager fileMgr, Set<StoredTabletFile> tabletFiles, CompactionKind... kinds) { for (CompactionKind kind : kinds) { @@ -446,6 +483,7 @@ public class CompactableImplFileManagerTest { public static final Duration SELECTION_EXPIRATION = Duration.ofMinutes(2); private long time = 0; public Set<CompactionKind> running = new HashSet<>(); + public AtomicLong lastCompactId = new AtomicLong(0); public TestFileManager() { super(new KeyExtent(TableId.of("1"), null, null), Set.of(), Optional.empty(), @@ -470,6 +508,11 @@ public class CompactableImplFileManagerTest { return time; } + @Override + protected long getLastCompactId() { + return lastCompactId.get(); + } + void setNanoTime(long t) { time = t; }