This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push:
new bf2b89e1ec Fixed compaction planning search bug (#5620)
bf2b89e1ec is described below
commit bf2b89e1ecdef9de5ef4716e601d42c7286900b1
Author: Keith Turner <[email protected]>
AuthorDate: Mon Jun 9 15:59:29 2025 -0400
Fixed compaction planning search bug (#5620)
When a tablet has too many files and its not compacting with the
configured compaction ratio the planner would search for the highest
ratio that would cause a compaction. Encountered a problem with this
search were two constraints in the code would cause nothing to be found.
One constraint was a check to only accept compactions that satisfied
`filesToCompact.size() < goalCompactionSize`. The other constraint was
that `DefaultCompactionPlanner.findDataFilesToCompact()` would find the
largest set of small files to compact. Putting these two together in
some cases `findDataFilesToCompact()` would keep finding a set of files
smaller than `goalCompactionSize` and log a warning that it could not
find anything. However the set it did find would have been good to
compact.
Looking into fixing this came to the conclusion that the behavior of
`findDataFilesToCompact()` where it finds the largest set of small files
to compact is probably not optimal when varying the compaction ratio.
Also its probably best to not set a minimum size to compact, its ok if
it takes multiple compactions to resolve the issue. Changed the code to
find the files with highest ratio and dropped those two other
constraints.
Added a test that causes the two constraints to conflict and this new
test will not pass with the old code. Adjusted some of the existing
test because of the change in behavior.
Added a new config option to limit the minimum compaction ratio of the
search. Defaulted this to 1.1 which may be extreme, not sure. Files with
a difference in size of 10x will have a ratio of 1.1. This is roughly
the minimum ratio that the old code would search.
Co-authored-by: Dave Marion <[email protected]>
---
.../spi/compaction/DefaultCompactionPlanner.java | 98 ++++++++++++----------
.../compaction/DefaultCompactionPlannerTest.java | 55 ++++++++----
2 files changed, 90 insertions(+), 63 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
index a0d8a7872f..e0985329eb 100644
---
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
+++
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
@@ -124,8 +124,9 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
* </ol>
* For example, given a tablet with 20 files, and table.file.max is 15 and no
compactions are
* planned. If the compaction ratio is set to 3, then this plugin will find
the largest compaction
- * ratio less than 3 that results in a compaction.
- *
+ * ratio less than 3 that results in a compaction. The lowest compaction ratio
that will be
+ * considered in this search defaults to 1.1. Starting in 2.1.4, the lower
bound for the search can
+ * be set using {@code
tserver.compaction.major.service.<service>.opts.lowestRatio}
*
* @since 2.1.0
* @see org.apache.accumulo.core.spi.compaction
@@ -165,6 +166,7 @@ public class DefaultCompactionPlanner implements
CompactionPlanner {
private List<Executor> executors;
private int maxFilesToCompact;
+ private double lowestRatio;
@SuppressFBWarnings(value = {"UWF_UNWRITTEN_FIELD", "NP_UNWRITTEN_FIELD"},
justification = "Field is written by Gson")
@@ -229,6 +231,10 @@ public class DefaultCompactionPlanner implements
CompactionPlanner {
"Duplicate maxSize set in executors. " +
params.getOptions().get("executors"));
}
});
+
+ lowestRatio =
Double.parseDouble(params.getOptions().getOrDefault("lowestRatio", "1.1"));
+ Preconditions.checkArgument(lowestRatio >= 1.0, "lowestRatio must be >=
1.0 not %s",
+ lowestRatio);
} else {
throw new IllegalStateException("No defined executors for this planner");
}
@@ -368,52 +374,54 @@ public class DefaultCompactionPlanner implements
CompactionPlanner {
*/
private Collection<CompactableFile>
findFilesToCompactWithLowerRatio(PlanningParameters params,
long maxSizeToCompact, int maxTabletFiles) {
- double lowRatio = 1.0;
- double highRatio = params.getRatio();
-
- Preconditions.checkArgument(highRatio >= lowRatio);
var candidates = Set.copyOf(params.getCandidates());
- Collection<CompactableFile> found = Set.of();
-
- int goalCompactionSize = candidates.size() - maxTabletFiles + 1;
- if (goalCompactionSize > maxFilesToCompact) {
- // The tablet is way over max tablet files, so multiple compactions will
be needed. Therefore,
- // do not set a goal size for this compaction and find the largest
compaction ratio that will
- // compact some set of files.
- goalCompactionSize = 0;
- }
-
- // Do a binary search of the compaction ratios.
- while (highRatio - lowRatio > .1) {
- double ratioToCheck = (highRatio - lowRatio) / 2 + lowRatio;
-
- // This is continually resorting the list of files in the following
call, could optimize this
- var filesToCompact =
- findDataFilesToCompact(candidates, ratioToCheck, maxFilesToCompact,
maxSizeToCompact);
-
- log.trace("Tried ratio {} and found {} {} {}", ratioToCheck,
filesToCompact,
- filesToCompact.size() >= goalCompactionSize, goalCompactionSize);
+ List<CompactableFile> sortedFiles = sortAndLimitByMaxSize(candidates,
maxSizeToCompact);
+
+ List<CompactableFile> found = List.of();
+ double largestRatioSeen = Double.MIN_VALUE;
+
+ if (sortedFiles.size() > 1) {
+ int windowStart = 0;
+ int windowEnd = Math.min(sortedFiles.size(), maxFilesToCompact);
+
+ while (windowEnd <= sortedFiles.size()) {
+ var filesInWindow = sortedFiles.subList(windowStart, windowEnd);
+
+ long sum = filesInWindow.get(0).getEstimatedSize();
+ for (int i = 1; i < filesInWindow.size(); i++) {
+ long size = filesInWindow.get(i).getEstimatedSize();
+ sum += size;
+ if (size > 0) {
+ // This is the compaction ratio needed to compact these files
+ double neededCompactionRatio = sum / (double) size;
+ log.trace("neededCompactionRatio:{} files:{}",
neededCompactionRatio,
+ filesInWindow.subList(0, i + 1));
+ if (neededCompactionRatio >= largestRatioSeen) {
+ largestRatioSeen = neededCompactionRatio;
+ found = filesInWindow.subList(0, i + 1);
+ }
+ } else {
+ log.warn("Unexpected size seen for file {} {} {}",
params.getTabletId(),
+ filesInWindow.get(i).getFileName(), size);
+ }
+ }
- if (filesToCompact.isEmpty() || filesToCompact.size() <
goalCompactionSize) {
- highRatio = ratioToCheck;
- } else {
- lowRatio = ratioToCheck;
- found = filesToCompact;
+ windowStart++;
+ windowEnd++;
}
- }
+ } // else all of the files are too large
- if (found.isEmpty() && lowRatio == 1.0) {
+ if (found.isEmpty() || largestRatioSeen <= lowestRatio) {
var examinedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact);
var excludedBecauseMaxSize = candidates.size() - examinedFiles.size();
var tabletId = params.getTabletId();
- log.warn(
- "Unable to plan compaction for {} that has too many files. {}:{}
num_files:{} "
- + "excluded_large_files:{} max_compaction_size:{}
ratio_search_range:{},{} ",
+ log.warn("Unable to plan compaction for {} that has too many files.
{}:{} num_files:{} "
+ + "excluded_large_files:{} max_compaction_size:{} ratio:{}
largestRatioSeen:{} lowestRatio:{}",
tabletId, Property.TABLE_FILE_MAX.getKey(), maxTabletFiles,
candidates.size(),
- excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact),
highRatio,
- params.getRatio());
+ excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact),
params.getRatio(),
+ largestRatioSeen, lowestRatio);
if (log.isDebugEnabled()) {
var sizesOfExamined = examinedFiles.stream()
.map(compactableFile ->
NumUtil.bigNumberForSize(compactableFile.getEstimatedSize()))
@@ -426,14 +434,14 @@ public class DefaultCompactionPlanner implements
CompactionPlanner {
log.debug("Failed planning details for {} examined_file_sizes:{}
excluded_file_sizes:{}",
tabletId, sizesOfExamined, sizesOfExcluded);
}
+ found = List.of();
+ } else {
+ log.info(
+ "For {} found {} files to compact lowering compaction ratio from {}
to {} because the tablet "
+ + "exceeded {} files, it had {}",
+ params.getTabletId(), found.size(), params.getRatio(),
largestRatioSeen, maxTabletFiles,
+ params.getCandidates().size());
}
-
- log.info(
- "For {} found {} files to compact lowering compaction ratio from {} to
{} because the tablet "
- + "exceeded {} files, it had {}",
- params.getTabletId(), found.size(), params.getRatio(), lowRatio,
maxTabletFiles,
- params.getCandidates().size());
-
return found;
}
diff --git
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
index 4e5f1c4d86..b2cf3287f5 100644
---
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
+++
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
@@ -525,13 +525,14 @@ public class DefaultCompactionPlannerTest {
overrides.put(Property.TABLE_FILE_MAX.getKey(), "7");
var conf = new
ConfigurationImpl(SiteConfiguration.empty().withOverrides(overrides).build());
- // For this case need to compact three files and the highest ratio that
achieves that is 1.8
+ // The highest ratio is 1.9 so should compact this. Will need a subsequent
compaction to bring
+ // it below the limit.
var planner = createPlanner(conf, executors);
var all = createCFs(1000, 1.1, 1.9, 1.8, 1.6, 1.3, 1.4, 1.3, 1.2, 1.1);
var params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
var plan = planner.makePlan(params);
var job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(1000, 1.1, 1.9, 1.8), job.getFiles());
+ assertEquals(createCFs(1000, 1.1, 1.9), job.getFiles());
// For this case need to compact two files and the highest ratio that
achieves that is 2.9
all = createCFs(1000, 2, 2.9, 2.8, 2.7, 2.6, 2.5, 2.4, 2.3);
@@ -564,21 +565,26 @@ public class DefaultCompactionPlannerTest {
assertEquals(createCFs(1000, 1.9), job.getFiles());
}
- // In this case the tablet can be brought below the max limit in single
compaction, so it should
- // find this
+ // The max compaction ratio is the first two files, so should compact
these. Will require
+ // multiple compactions to bring the tablet below the limit.
all =
createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1, 1.1, 1.1,
1.1, 1.1, 1.1, 1.1);
params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
plan = planner.makePlan(params);
job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1),
job.getFiles());
+ assertEquals(createCFs(1000, 1.9), job.getFiles());
+
+ all = createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54, 2.3, 1.7, 1.5, 1.4);
+ params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
+ plan = planner.makePlan(params);
+ job = getOnlyElement(plan.getJobs());
+ assertEquals(createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54),
job.getFiles());
// each file is 10x the size of the file smaller than it
all = createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1);
params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
plan = planner.makePlan(params);
- job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1),
job.getFiles());
+ assertTrue(plan.getJobs().isEmpty());
// test with some files growing 20x, ensure those are not included
for (var ratio : List.of(1.9, 2.0, 3.0, 4.0)) {
@@ -588,7 +594,6 @@ public class DefaultCompactionPlannerTest {
job = getOnlyElement(plan.getJobs());
assertEquals(createCFs(10, 1.05, 1.05, 1.25, 1.75), job.getFiles());
}
-
}
@Test
@@ -621,12 +626,24 @@ public class DefaultCompactionPlannerTest {
assertTrue(plan.getJobs().isEmpty());
- // a really bad situation, each file is 20 times the size of its smaller
file. The algorithm
- // does not search that for ratios that low.
- all = createCFs(10, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
+ // a really bad situation, each file is 20 times the size of its smaller
file. By default, the
+ // algorithm does not search that for ratios that low.
+ all = createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
plan = planner.makePlan(params);
assertTrue(plan.getJobs().isEmpty());
+
+ // adjust the config for the lowest search ratio and recreate the planner,
this should allow a
+ // compaction to happen
+ var overrides2 = new HashMap<>(overrides);
+ overrides2.put(
+ Property.TSERV_COMPACTION_SERVICE_PREFIX.getKey() +
"cs1.planner.opts.lowestRatio", "1.04");
+ var conf2 = new
ConfigurationImpl(SiteConfiguration.empty().withOverrides(overrides2).build());
+ var planner2 = createPlanner(conf2, executors);
+ params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
+ plan = planner2.makePlan(params);
+ var job2 = getOnlyElement(plan.getJobs());
+ assertEquals(createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05),
job2.getFiles());
}
// Test to ensure that plugin falls back from TABLE_FILE_MAX to
TSERV_SCAN_MAX_OPENFILES
@@ -648,7 +665,7 @@ public class DefaultCompactionPlannerTest {
var params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
var plan = planner.makePlan(params);
var job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4),
job.getFiles());
+ assertEquals(createCFs(1000, 1.9), job.getFiles());
}
private CompactionJob createJob(CompactionKind kind, Set<CompactableFile>
all,
@@ -811,14 +828,16 @@ public class DefaultCompactionPlannerTest {
private static CompactionPlanner.InitParameters getInitParams(Configuration
conf,
String executors) {
- String maxOpen =
- conf.get(Property.TSERV_COMPACTION_SERVICE_PREFIX.getKey() +
"cs1.planner.opts.maxOpen");
Map<String,String> options = new HashMap<>();
- options.put("executors", executors.replaceAll("'", "\""));
- if (maxOpen != null) {
- options.put("maxOpen", maxOpen);
- }
+ String prefix = Property.TSERV_COMPACTION_SERVICE_PREFIX.getKey() +
"cs1.planner.opts.";
+
+ conf.forEach(e -> {
+ if (e.getKey().startsWith(prefix)) {
+ options.put(e.getKey().substring(prefix.length()), e.getValue());
+ }
+ });
+ options.put("executors", executors.replaceAll("'", "\""));
ServiceEnvironment senv = EasyMock.createMock(ServiceEnvironment.class);
EasyMock.expect(senv.getConfiguration()).andReturn(conf).anyTimes();