This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new 22c3e23695 Fixed compaction planning search bug (#5620)
22c3e23695 is described below
commit 22c3e23695d3c4c6c8b8812b69f877df161c8615
Author: Keith Turner <[email protected]>
AuthorDate: Mon Jun 9 15:59:29 2025 -0400
Fixed compaction planning search bug (#5620)
Pulled changes from bf2b89e1ec #5620 in 2.1 into main and resolved
conflicts. This partially resolves #5634
---
.../compaction/RatioBasedCompactionPlanner.java | 99 ++++++++++++----------
.../RatioBasedCompactionPlannerTest.java | 57 +++++++++----
2 files changed, 95 insertions(+), 61 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
index 0bfdd77589..d6ce07c4f8 100644
---
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
+++
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
@@ -118,9 +118,11 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
* </ol>
* For example, given a tablet with 20 files, and table.file.max is 15 and no
compactions are
* planned. If the compaction ratio is set to 3, then this plugin will find
the largest compaction
- * ratio less than 3 that results in a compaction.
+ * ratio less than 3 that results in a compaction. The lowest compaction ratio
that will be
+ * considered in this search defaults to 1.1. Starting in 2.1.4, the lower
bound for the search can
+ * be set using {@code
tserver.compaction.major.service.<service>.opts.lowestRatio}
*
- * @since 3.1.0
+ * @since 4.0.0
* @see org.apache.accumulo.core.spi.compaction
*/
@@ -171,6 +173,7 @@ public class RatioBasedCompactionPlanner implements
CompactionPlanner {
private List<CompactionGroup> groups;
private int maxFilesToCompact;
+ private double lowestRatio;
@SuppressFBWarnings(value = {"UWF_UNWRITTEN_FIELD", "NP_UNWRITTEN_FIELD"},
justification = "Field is written by Gson")
@@ -223,6 +226,10 @@ public class RatioBasedCompactionPlanner implements
CompactionPlanner {
}
});
+ lowestRatio =
Double.parseDouble(params.getOptions().getOrDefault("lowestRatio", "1.1"));
+ Preconditions.checkArgument(lowestRatio >= 1.0, "lowestRatio must be >=
1.0 not %s",
+ lowestRatio);
+
determineMaxFilesToCompact(params);
}
@@ -379,52 +386,54 @@ public class RatioBasedCompactionPlanner implements
CompactionPlanner {
*/
private List<Collection<CompactableFile>> findFilesToCompactWithLowerRatio(
PlanningParameters params, long maxSizeToCompact, int maxTabletFiles) {
- double lowRatio = 1.0;
- double highRatio = params.getRatio();
-
- Preconditions.checkArgument(highRatio >= lowRatio);
var candidates = Set.copyOf(params.getCandidates());
- Collection<CompactableFile> found = Set.of();
-
- int goalCompactionSize = candidates.size() - maxTabletFiles + 1;
- if (goalCompactionSize > maxFilesToCompact) {
- // The tablet is way over max tablet files, so multiple compactions will
be needed. Therefore,
- // do not set a goal size for this compaction and find the largest
compaction ratio that will
- // compact some set of files.
- goalCompactionSize = 0;
- }
-
- // Do a binary search of the compaction ratios.
- while (highRatio - lowRatio > .1) {
- double ratioToCheck = (highRatio - lowRatio) / 2 + lowRatio;
-
- // This is continually resorting the list of files in the following
call, could optimize this
- var filesToCompact =
- findDataFilesToCompact(candidates, ratioToCheck, maxFilesToCompact,
maxSizeToCompact);
-
- log.trace("Tried ratio {} and found {} {} {} {}", ratioToCheck,
filesToCompact,
- filesToCompact.size() >= goalCompactionSize, goalCompactionSize,
maxFilesToCompact);
+ List<CompactableFile> sortedFiles = sortAndLimitByMaxSize(candidates,
maxSizeToCompact);
+
+ List<CompactableFile> found = List.of();
+ double largestRatioSeen = Double.MIN_VALUE;
+
+ if (sortedFiles.size() > 1) {
+ int windowStart = 0;
+ int windowEnd = Math.min(sortedFiles.size(), maxFilesToCompact);
+
+ while (windowEnd <= sortedFiles.size()) {
+ var filesInWindow = sortedFiles.subList(windowStart, windowEnd);
+
+ long sum = filesInWindow.get(0).getEstimatedSize();
+ for (int i = 1; i < filesInWindow.size(); i++) {
+ long size = filesInWindow.get(i).getEstimatedSize();
+ sum += size;
+ if (size > 0) {
+ // This is the compaction ratio needed to compact these files
+ double neededCompactionRatio = sum / (double) size;
+ log.trace("neededCompactionRatio:{} files:{}",
neededCompactionRatio,
+ filesInWindow.subList(0, i + 1));
+ if (neededCompactionRatio >= largestRatioSeen) {
+ largestRatioSeen = neededCompactionRatio;
+ found = filesInWindow.subList(0, i + 1);
+ }
+ } else {
+ log.warn("Unexpected size seen for file {} {} {}",
params.getTabletId(),
+ filesInWindow.get(i).getFileName(), size);
+ }
+ }
- if (filesToCompact.isEmpty() || filesToCompact.size() <
goalCompactionSize) {
- highRatio = ratioToCheck;
- } else {
- lowRatio = ratioToCheck;
- found = filesToCompact;
+ windowStart++;
+ windowEnd++;
}
- }
+ } // else all of the files are too large
- if (found.isEmpty() && lowRatio == 1.0) {
+ if (found.isEmpty() || largestRatioSeen <= lowestRatio) {
var examinedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact);
var excludedBecauseMaxSize = candidates.size() - examinedFiles.size();
var tabletId = params.getTabletId();
- log.warn(
- "Unable to plan compaction for {} that has too many files. {}:{}
num_files:{} "
- + "excluded_large_files:{} max_compaction_size:{}
ratio_search_range:{},{} ",
+ log.warn("Unable to plan compaction for {} that has too many files.
{}:{} num_files:{} "
+ + "excluded_large_files:{} max_compaction_size:{} ratio:{}
largestRatioSeen:{} lowestRatio:{}",
tabletId, Property.TABLE_FILE_MAX.getKey(), maxTabletFiles,
candidates.size(),
- excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact),
highRatio,
- params.getRatio());
+ excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact),
params.getRatio(),
+ largestRatioSeen, lowestRatio);
if (log.isDebugEnabled()) {
var sizesOfExamined = examinedFiles.stream()
.map(compactableFile ->
NumUtil.bigNumberForSize(compactableFile.getEstimatedSize()))
@@ -437,14 +446,14 @@ public class RatioBasedCompactionPlanner implements
CompactionPlanner {
log.debug("Failed planning details for {} examined_file_sizes:{}
excluded_file_sizes:{}",
tabletId, sizesOfExamined, sizesOfExcluded);
}
+ found = List.of();
+ } else {
+ log.info(
+ "For {} found {} files to compact lowering compaction ratio from {}
to {} because the tablet "
+ + "exceeded {} files, it had {}",
+ params.getTabletId(), found.size(), params.getRatio(),
largestRatioSeen, maxTabletFiles,
+ params.getCandidates().size());
}
-
- log.info(
- "For {} found {} files to compact lowering compaction ratio from {} to
{} because the tablet "
- + "exceeded {} files, it had {}",
- params.getTabletId(), found.size(), params.getRatio(), lowRatio,
maxTabletFiles,
- params.getCandidates().size());
-
if (found.isEmpty()) {
return List.of();
} else {
diff --git
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
index 7484602e92..5c18325a34 100644
---
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
+++
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
@@ -42,6 +42,7 @@ import
org.apache.accumulo.core.client.admin.compaction.CompactableFile;
import org.apache.accumulo.core.clientImpl.Namespace;
import org.apache.accumulo.core.conf.ConfigurationTypeHelper;
import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.conf.SiteConfiguration;
import org.apache.accumulo.core.data.NamespaceId;
import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.data.TabletId;
@@ -51,6 +52,7 @@ import org.apache.accumulo.core.spi.common.ServiceEnvironment;
import org.apache.accumulo.core.spi.common.ServiceEnvironment.Configuration;
import org.apache.accumulo.core.spi.compaction.CompactionPlan.Builder;
import
org.apache.accumulo.core.spi.compaction.CompactionPlanner.InitParameters;
+import org.apache.accumulo.core.util.ConfigurationImpl;
import org.apache.accumulo.core.util.compaction.CompactionJobImpl;
import org.apache.accumulo.core.util.compaction.CompactionJobPrioritizer;
import org.apache.accumulo.core.util.compaction.CompactionPlanImpl;
@@ -546,13 +548,14 @@ public class RatioBasedCompactionPlannerTest {
overrides.put(Property.TABLE_FILE_MAX.getKey(), "7");
var conf = ServiceEnvironment.Configuration.from(overrides, false);
- // For this case need to compact three files and the highest ratio that
achieves that is 1.8
+ // The highest ratio is 1.9 so should compact this. Will need a subsequent
compaction to bring
+ // it below the limit.
var planner = createPlanner(conf, groups);
var all = createCFs(1000, 1.1, 1.9, 1.8, 1.6, 1.3, 1.4, 1.3, 1.2, 1.1);
var params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
var plan = planner.makePlan(params);
var job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(1000, 1.1, 1.9, 1.8), job.getFiles());
+ assertEquals(createCFs(1000, 1.1, 1.9), job.getFiles());
// For this case need to compact two files and the highest ratio that
achieves that is 2.9
all = createCFs(1000, 2, 2.9, 2.8, 2.7, 2.6, 2.5, 2.4, 2.3);
@@ -585,21 +588,26 @@ public class RatioBasedCompactionPlannerTest {
assertEquals(createCFs(1000, 1.9), job.getFiles());
}
- // In this case the tablet can be brought below the max limit in single
compaction, so it should
- // find this
+ // The max compaction ratio is the first two files, so should compact
these. Will require
+ // multiple compactions to bring the tablet below the limit.
all =
createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1, 1.1, 1.1,
1.1, 1.1, 1.1, 1.1);
params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
plan = planner.makePlan(params);
job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1),
job.getFiles());
+ assertEquals(createCFs(1000, 1.9), job.getFiles());
+
+ all = createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54, 2.3, 1.7, 1.5, 1.4);
+ params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
+ plan = planner.makePlan(params);
+ job = getOnlyElement(plan.getJobs());
+ assertEquals(createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54),
job.getFiles());
// each file is 10x the size of the file smaller than it
all = createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1);
params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
plan = planner.makePlan(params);
- job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1),
job.getFiles());
+ assertTrue(plan.getJobs().isEmpty());
// test with some files growing 20x, ensure those are not included
for (var ratio : List.of(1.9, 2.0, 3.0, 4.0)) {
@@ -609,7 +617,6 @@ public class RatioBasedCompactionPlannerTest {
job = getOnlyElement(plan.getJobs());
assertEquals(createCFs(10, 1.05, 1.05, 1.25, 1.75), job.getFiles());
}
-
}
@Test
@@ -640,12 +647,24 @@ public class RatioBasedCompactionPlannerTest {
assertTrue(plan.getJobs().isEmpty());
- // a really bad situation, each file is 20 times the size of its smaller
file. The algorithm
- // does not search that for ratios that low.
- all = createCFs(10, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
+ // a really bad situation, each file is 20 times the size of its smaller
file. By default, the
+ // algorithm does not search that for ratios that low.
+ all = createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
plan = planner.makePlan(params);
assertTrue(plan.getJobs().isEmpty());
+
+ // adjust the config for the lowest search ratio and recreate the planner,
this should allow a
+ // compaction to happen
+ var overrides2 = new HashMap<>(overrides);
+ overrides2.put(Property.COMPACTION_SERVICE_PREFIX.getKey() +
"cs1.planner.opts.lowestRatio",
+ "1.04");
+ var conf2 = new
ConfigurationImpl(SiteConfiguration.empty().withOverrides(overrides2).build());
+ var planner2 = createPlanner(conf2, groups);
+ params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
+ plan = planner2.makePlan(params);
+ var job2 = getOnlyElement(plan.getJobs());
+ assertEquals(createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05),
job2.getFiles());
}
// Test to ensure that plugin falls back from TABLE_FILE_MAX to
TSERV_SCAN_MAX_OPENFILES
@@ -665,7 +684,7 @@ public class RatioBasedCompactionPlannerTest {
var params = createPlanningParams(all, all, Set.of(), 3,
CompactionKind.SYSTEM, conf);
var plan = planner.makePlan(params);
var job = getOnlyElement(plan.getJobs());
- assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4),
job.getFiles());
+ assertEquals(createCFs(1000, 1.9), job.getFiles());
}
private CompactionJob createJob(CompactionKind kind, Set<CompactableFile>
all,
@@ -831,13 +850,19 @@ public class RatioBasedCompactionPlannerTest {
}
private static CompactionPlanner.InitParameters getInitParams(Configuration
conf, String groups) {
- String maxOpen = conf.get(prefix + "cs1.planner.opts.maxOpen");
+
Map<String,String> options = new HashMap<>();
+
+ var optsPrefix = prefix + "cs1.planner.opts.";
+
+ conf.forEach(e -> {
+ if (e.getKey().startsWith(optsPrefix)) {
+ options.put(e.getKey().substring(optsPrefix.length()), e.getValue());
+ }
+ });
options.put("groups", groups.replaceAll("'", "\""));
- if (maxOpen != null) {
- options.put("maxOpen", maxOpen);
- } else {
+ if (!options.containsKey("maxOpen")) {
options.put("maxOpen", "15");
}