This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push:
new e4f4727e73 Considers projected compactions when searching for files to
compact (#5675)
e4f4727e73 is described below
commit e4f4727e735ce90a4e72999006c38e92d8b82cdb
Author: Keith Turner <[email protected]>
AuthorDate: Thu Aug 7 16:20:54 2025 -0400
Considers projected compactions when searching for files to compact (#5675)
When attempting to merge #5588 to main, found a bug with the change.
The bug was that the code did not consider transitive compactions when
searching past currently running compactions. This bug would not cause
any errors, it was just suboptimal. After these changes no compactions
that would transitively include any smaller files will be planned.
For a tablet that has 6 1M files compacting, 5 7M files and 4 40M files
the code without this fix would find the 4 40M files to compact.
However if we wait for the compaction of the 5 7M files (which is
waiting on the compaction of the 6 1M files), then the 4 40M files would
be compacted with the 41M file that would be produced. These changes
would wait for that making the three compactions run sequentially so they
can include each others output.
A second test added in this change gives examples cases of compactions
that can be found when there are running compaction.
---
.../spi/compaction/DefaultCompactionPlanner.java | 6 ++
.../compaction/DefaultCompactionPlannerTest.java | 94 +++++++++++++++++++---
2 files changed, 90 insertions(+), 10 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
index 38af298b6e..99bfd8472c 100644
---
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
+++
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
@@ -324,6 +324,12 @@ public class DefaultCompactionPlanner implements
CompactionPlanner {
var futureFile = getExpectedFile(group, nextExpected);
Preconditions.checkState(expectedFiles.add(futureFile), "Unexpected
duplicate %s in %s",
futureFile, expectedFiles);
+ // Include this expected file in the set of files used for planning
future compactions.
+ // This will cause any compaction that would include this file to be
ignored. If a
+ // compaction would include this file, then it is best if the
compactions run
+ // sequentially.
+ Preconditions.checkState(filesCopy.add(futureFile), "Unexpected
duplicate %s in %s",
+ futureFile, filesCopy);
// look for any compaction work in the remaining set of files
group = findDataFilesToCompact(filesCopy, params.getRatio(),
maxFilesToCompact,
maxSizeToCompact);
diff --git
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
index 04bd7baa09..45d6d09373 100644
---
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
+++
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
@@ -222,11 +222,72 @@ public class DefaultCompactionPlannerTest {
var params = createPlanningParams(all, candidates, jobs, 2,
CompactionKind.SYSTEM);
var plan = planner.makePlan(params);
- // the size 100 files should be excluded because the job running over size
10 files will produce
- // a file in their size range, so should see the 1000 size files planned
for compaction
+ // The compaction running over the size 10 files would produce a file that
would be used by a
+ // compaction over the size 100 files. A compaction over the size 100
files would produce a file
+ // that would be used by a compaction over the size 1000 files. This
should continue up the
+ // chain disqualifying all sets of files for compaction.
+ assertEquals(List.of(), plan.getJobs());
+ }
+
+ @Test
+ public void testRunningCompactionLookAhead2() {
+ var aconf = SiteConfiguration.empty()
+ .withOverrides(Map.of(
+ Property.TSERV_COMPACTION_SERVICE_PREFIX.getKey() +
"cs1.planner.opts.maxOpen", "10"))
+ .build();
+ ConfigurationImpl config = new ConfigurationImpl(aconf);
+
+ String executors = "[{'name':'small','type':
'internal','maxSize':'32M','numThreads':1},"
+ + "{'name':'medium','type':
'internal','maxSize':'128M','numThreads':2},"
+ + "{'name':'large','type':
'internal','maxSize':'512M','numThreads':3},"
+ + "{'name':'huge','type': 'internal','numThreads':4}]";
+
+ var planner = createPlanner(config, executors);
+
+ int count = 0;
+
+ // create 10 files of size 11 as compacting
+ List<String> compactingString = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ compactingString.add("F" + count++);
+ compactingString.add(11 + "");
+ }
+
+ // create 10 files of size 11 as the tablets files
+ List<String> candidateStrings = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ candidateStrings.add("F" + count++);
+ candidateStrings.add(11 + "");
+ }
+
+ // create 17 files of size 100,1000, and 10_000 as the tablets files
+ for (int size = 100; size < 100_000; size *= 10) {
+ for (int i = 0; i < 17; i++) {
+ candidateStrings.add("F" + count++);
+ candidateStrings.add(size + "");
+ }
+ }
+
+ // create 5 files of 100_000 as the tablets files
+ for (int i = 0; i < 5; i++) {
+ candidateStrings.add("F" + count++);
+ candidateStrings.add(100_000 + "");
+ }
+
+ var compacting = createCFs(compactingString.toArray(new String[0]));
+ var candidates = createCFs(candidateStrings.toArray(new String[0]));
+ var all = Sets.union(compacting, candidates);
+ var jobs = Set.of(createJob(CompactionKind.SYSTEM, all, compacting));
+ var params = createPlanningParams(all, candidates, jobs, 2,
CompactionKind.SYSTEM);
+ var plan = planner.makePlan(params);
+
+ // There are currently 20 files of size 11 of which 10 are compacting. The
10 files that are
+ // compacting would produce a file with a projected size of 110. The file
with a projected size
+ // of 110 would not be included in a compaction of the 10 files of size
11, therefore its ok to
+ // compact the 10 files of size 11 and they should be found.
var job = getOnlyElement(plan.getJobs());
- assertEquals(4, job.getFiles().size());
- assertTrue(job.getFiles().stream().allMatch(f -> f.getEstimatedSize() ==
1_000));
+ assertEquals(10, job.getFiles().size());
+ assertTrue(job.getFiles().stream().allMatch(f -> f.getEstimatedSize() ==
11));
// try planning again incorporating the job returned from previous plan
var jobs2 = Sets.union(jobs, Set.copyOf(plan.getJobs()));
@@ -235,12 +296,12 @@ public class DefaultCompactionPlannerTest {
params = createPlanningParams(all, candidates2, jobs2, 2,
CompactionKind.SYSTEM);
plan = planner.makePlan(params);
- // The two running jobs are over 10 and 1000 sized files. The jobs should
exclude 100 and 10_000
- // sized files because they would produce a file in those size ranges.
This leaves the 100_000
- // sized files available to compact.
+ // All 20 of the size 11 files are compacting, which would produce 2 files
of size 110. There
+ // are 17 files of size 100, compacting 10 files of size 100 would not
include the two projected
+ // files of size 110. Should find 10 files of size 100 to compact.
job = getOnlyElement(plan.getJobs());
- assertEquals(4, job.getFiles().size());
- assertTrue(job.getFiles().stream().allMatch(f -> f.getEstimatedSize() ==
100_000));
+ assertEquals(10, job.getFiles().size());
+ assertTrue(job.getFiles().stream().allMatch(f -> f.getEstimatedSize() ==
100));
// try planning again incorporating the job returned from previous plan
var jobs3 = Sets.union(jobs2, Set.copyOf(plan.getJobs()));
@@ -249,7 +310,20 @@ public class DefaultCompactionPlannerTest {
params = createPlanningParams(all, candidates3, jobs3, 2,
CompactionKind.SYSTEM);
plan = planner.makePlan(params);
- // should find nothing to compact at this point
+ // Simulating multiple compactions forward, the next set of files that
would not include files
+ // from any other projected or running compactions are 9 files of size
10_000.
+ job = getOnlyElement(plan.getJobs());
+ assertEquals(9, job.getFiles().size());
+ assertTrue(job.getFiles().stream().allMatch(f -> f.getEstimatedSize() ==
10_000));
+
+ var jobs4 = Sets.union(jobs3, Set.copyOf(plan.getJobs()));
+ var candidates4 = new HashSet<>(candidates3);
+ candidates4.removeAll(job.getFiles());
+ params = createPlanningParams(all, candidates4, jobs4, 2,
CompactionKind.SYSTEM);
+ plan = planner.makePlan(params);
+
+ // The 5 files of size 100_000 should not be found because it would be
most optimal to compact
+ // those 5 files with the output of the compactions over the files of size
10_000.
assertEquals(0, plan.getJobs().size());
}