This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new 22c3e23695 Fixed compaction planning search bug (#5620) 22c3e23695 is described below commit 22c3e23695d3c4c6c8b8812b69f877df161c8615 Author: Keith Turner <ktur...@apache.org> AuthorDate: Mon Jun 9 15:59:29 2025 -0400 Fixed compaction planning search bug (#5620) Pulled changes from bf2b89e1ec #5620 in 2.1 into main and resolved conflicts. This partially resolves #5634 --- .../compaction/RatioBasedCompactionPlanner.java | 99 ++++++++++++---------- .../RatioBasedCompactionPlannerTest.java | 57 +++++++++---- 2 files changed, 95 insertions(+), 61 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java b/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java index 0bfdd77589..d6ce07c4f8 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java @@ -118,9 +118,11 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; * </ol> * For example, given a tablet with 20 files, and table.file.max is 15 and no compactions are * planned. If the compaction ratio is set to 3, then this plugin will find the largest compaction - * ratio less than 3 that results in a compaction. + * ratio less than 3 that results in a compaction. The lowest compaction ratio that will be + * considered in this search defaults to 1.1. Starting in 2.1.4, the lower bound for the search can + * be set using {@code tserver.compaction.major.service.<service>.opts.lowestRatio} * - * @since 3.1.0 + * @since 4.0.0 * @see org.apache.accumulo.core.spi.compaction */ @@ -171,6 +173,7 @@ public class RatioBasedCompactionPlanner implements CompactionPlanner { private List<CompactionGroup> groups; private int maxFilesToCompact; + private double lowestRatio; @SuppressFBWarnings(value = {"UWF_UNWRITTEN_FIELD", "NP_UNWRITTEN_FIELD"}, justification = "Field is written by Gson") @@ -223,6 +226,10 @@ public class RatioBasedCompactionPlanner implements CompactionPlanner { } }); + lowestRatio = Double.parseDouble(params.getOptions().getOrDefault("lowestRatio", "1.1")); + Preconditions.checkArgument(lowestRatio >= 1.0, "lowestRatio must be >= 1.0 not %s", + lowestRatio); + determineMaxFilesToCompact(params); } @@ -379,52 +386,54 @@ public class RatioBasedCompactionPlanner implements CompactionPlanner { */ private List<Collection<CompactableFile>> findFilesToCompactWithLowerRatio( PlanningParameters params, long maxSizeToCompact, int maxTabletFiles) { - double lowRatio = 1.0; - double highRatio = params.getRatio(); - - Preconditions.checkArgument(highRatio >= lowRatio); var candidates = Set.copyOf(params.getCandidates()); - Collection<CompactableFile> found = Set.of(); - - int goalCompactionSize = candidates.size() - maxTabletFiles + 1; - if (goalCompactionSize > maxFilesToCompact) { - // The tablet is way over max tablet files, so multiple compactions will be needed. Therefore, - // do not set a goal size for this compaction and find the largest compaction ratio that will - // compact some set of files. - goalCompactionSize = 0; - } - - // Do a binary search of the compaction ratios. - while (highRatio - lowRatio > .1) { - double ratioToCheck = (highRatio - lowRatio) / 2 + lowRatio; - - // This is continually resorting the list of files in the following call, could optimize this - var filesToCompact = - findDataFilesToCompact(candidates, ratioToCheck, maxFilesToCompact, maxSizeToCompact); - - log.trace("Tried ratio {} and found {} {} {} {}", ratioToCheck, filesToCompact, - filesToCompact.size() >= goalCompactionSize, goalCompactionSize, maxFilesToCompact); + List<CompactableFile> sortedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact); + + List<CompactableFile> found = List.of(); + double largestRatioSeen = Double.MIN_VALUE; + + if (sortedFiles.size() > 1) { + int windowStart = 0; + int windowEnd = Math.min(sortedFiles.size(), maxFilesToCompact); + + while (windowEnd <= sortedFiles.size()) { + var filesInWindow = sortedFiles.subList(windowStart, windowEnd); + + long sum = filesInWindow.get(0).getEstimatedSize(); + for (int i = 1; i < filesInWindow.size(); i++) { + long size = filesInWindow.get(i).getEstimatedSize(); + sum += size; + if (size > 0) { + // This is the compaction ratio needed to compact these files + double neededCompactionRatio = sum / (double) size; + log.trace("neededCompactionRatio:{} files:{}", neededCompactionRatio, + filesInWindow.subList(0, i + 1)); + if (neededCompactionRatio >= largestRatioSeen) { + largestRatioSeen = neededCompactionRatio; + found = filesInWindow.subList(0, i + 1); + } + } else { + log.warn("Unexpected size seen for file {} {} {}", params.getTabletId(), + filesInWindow.get(i).getFileName(), size); + } + } - if (filesToCompact.isEmpty() || filesToCompact.size() < goalCompactionSize) { - highRatio = ratioToCheck; - } else { - lowRatio = ratioToCheck; - found = filesToCompact; + windowStart++; + windowEnd++; } - } + } // else all of the files are too large - if (found.isEmpty() && lowRatio == 1.0) { + if (found.isEmpty() || largestRatioSeen <= lowestRatio) { var examinedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact); var excludedBecauseMaxSize = candidates.size() - examinedFiles.size(); var tabletId = params.getTabletId(); - log.warn( - "Unable to plan compaction for {} that has too many files. {}:{} num_files:{} " - + "excluded_large_files:{} max_compaction_size:{} ratio_search_range:{},{} ", + log.warn("Unable to plan compaction for {} that has too many files. {}:{} num_files:{} " + + "excluded_large_files:{} max_compaction_size:{} ratio:{} largestRatioSeen:{} lowestRatio:{}", tabletId, Property.TABLE_FILE_MAX.getKey(), maxTabletFiles, candidates.size(), - excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), highRatio, - params.getRatio()); + excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), params.getRatio(), + largestRatioSeen, lowestRatio); if (log.isDebugEnabled()) { var sizesOfExamined = examinedFiles.stream() .map(compactableFile -> NumUtil.bigNumberForSize(compactableFile.getEstimatedSize())) @@ -437,14 +446,14 @@ public class RatioBasedCompactionPlanner implements CompactionPlanner { log.debug("Failed planning details for {} examined_file_sizes:{} excluded_file_sizes:{}", tabletId, sizesOfExamined, sizesOfExcluded); } + found = List.of(); + } else { + log.info( + "For {} found {} files to compact lowering compaction ratio from {} to {} because the tablet " + + "exceeded {} files, it had {}", + params.getTabletId(), found.size(), params.getRatio(), largestRatioSeen, maxTabletFiles, + params.getCandidates().size()); } - - log.info( - "For {} found {} files to compact lowering compaction ratio from {} to {} because the tablet " - + "exceeded {} files, it had {}", - params.getTabletId(), found.size(), params.getRatio(), lowRatio, maxTabletFiles, - params.getCandidates().size()); - if (found.isEmpty()) { return List.of(); } else { diff --git a/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java b/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java index 7484602e92..5c18325a34 100644 --- a/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java +++ b/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java @@ -42,6 +42,7 @@ import org.apache.accumulo.core.client.admin.compaction.CompactableFile; import org.apache.accumulo.core.clientImpl.Namespace; import org.apache.accumulo.core.conf.ConfigurationTypeHelper; import org.apache.accumulo.core.conf.Property; +import org.apache.accumulo.core.conf.SiteConfiguration; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.TabletId; @@ -51,6 +52,7 @@ import org.apache.accumulo.core.spi.common.ServiceEnvironment; import org.apache.accumulo.core.spi.common.ServiceEnvironment.Configuration; import org.apache.accumulo.core.spi.compaction.CompactionPlan.Builder; import org.apache.accumulo.core.spi.compaction.CompactionPlanner.InitParameters; +import org.apache.accumulo.core.util.ConfigurationImpl; import org.apache.accumulo.core.util.compaction.CompactionJobImpl; import org.apache.accumulo.core.util.compaction.CompactionJobPrioritizer; import org.apache.accumulo.core.util.compaction.CompactionPlanImpl; @@ -546,13 +548,14 @@ public class RatioBasedCompactionPlannerTest { overrides.put(Property.TABLE_FILE_MAX.getKey(), "7"); var conf = ServiceEnvironment.Configuration.from(overrides, false); - // For this case need to compact three files and the highest ratio that achieves that is 1.8 + // The highest ratio is 1.9 so should compact this. Will need a subsequent compaction to bring + // it below the limit. var planner = createPlanner(conf, groups); var all = createCFs(1000, 1.1, 1.9, 1.8, 1.6, 1.3, 1.4, 1.3, 1.2, 1.1); var params = createPlanningParams(all, all, Set.of(), 3, CompactionKind.SYSTEM, conf); var plan = planner.makePlan(params); var job = getOnlyElement(plan.getJobs()); - assertEquals(createCFs(1000, 1.1, 1.9, 1.8), job.getFiles()); + assertEquals(createCFs(1000, 1.1, 1.9), job.getFiles()); // For this case need to compact two files and the highest ratio that achieves that is 2.9 all = createCFs(1000, 2, 2.9, 2.8, 2.7, 2.6, 2.5, 2.4, 2.3); @@ -585,21 +588,26 @@ public class RatioBasedCompactionPlannerTest { assertEquals(createCFs(1000, 1.9), job.getFiles()); } - // In this case the tablet can be brought below the max limit in single compaction, so it should - // find this + // The max compaction ratio is the first two files, so should compact these. Will require + // multiple compactions to bring the tablet below the limit. all = createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1); params = createPlanningParams(all, all, Set.of(), 3, CompactionKind.SYSTEM, conf); plan = planner.makePlan(params); job = getOnlyElement(plan.getJobs()); - assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1), job.getFiles()); + assertEquals(createCFs(1000, 1.9), job.getFiles()); + + all = createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54, 2.3, 1.7, 1.5, 1.4); + params = createPlanningParams(all, all, Set.of(), 3, CompactionKind.SYSTEM, conf); + plan = planner.makePlan(params); + job = getOnlyElement(plan.getJobs()); + assertEquals(createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54), job.getFiles()); // each file is 10x the size of the file smaller than it all = createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1); params = createPlanningParams(all, all, Set.of(), 3, CompactionKind.SYSTEM, conf); plan = planner.makePlan(params); - job = getOnlyElement(plan.getJobs()); - assertEquals(createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1), job.getFiles()); + assertTrue(plan.getJobs().isEmpty()); // test with some files growing 20x, ensure those are not included for (var ratio : List.of(1.9, 2.0, 3.0, 4.0)) { @@ -609,7 +617,6 @@ public class RatioBasedCompactionPlannerTest { job = getOnlyElement(plan.getJobs()); assertEquals(createCFs(10, 1.05, 1.05, 1.25, 1.75), job.getFiles()); } - } @Test @@ -640,12 +647,24 @@ public class RatioBasedCompactionPlannerTest { assertTrue(plan.getJobs().isEmpty()); - // a really bad situation, each file is 20 times the size of its smaller file. The algorithm - // does not search that for ratios that low. - all = createCFs(10, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05); + // a really bad situation, each file is 20 times the size of its smaller file. By default, the + // algorithm does not search that for ratios that low. + all = createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05); params = createPlanningParams(all, all, Set.of(), 3, CompactionKind.SYSTEM, conf); plan = planner.makePlan(params); assertTrue(plan.getJobs().isEmpty()); + + // adjust the config for the lowest search ratio and recreate the planner, this should allow a + // compaction to happen + var overrides2 = new HashMap<>(overrides); + overrides2.put(Property.COMPACTION_SERVICE_PREFIX.getKey() + "cs1.planner.opts.lowestRatio", + "1.04"); + var conf2 = new ConfigurationImpl(SiteConfiguration.empty().withOverrides(overrides2).build()); + var planner2 = createPlanner(conf2, groups); + params = createPlanningParams(all, all, Set.of(), 3, CompactionKind.SYSTEM, conf); + plan = planner2.makePlan(params); + var job2 = getOnlyElement(plan.getJobs()); + assertEquals(createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05), job2.getFiles()); } // Test to ensure that plugin falls back from TABLE_FILE_MAX to TSERV_SCAN_MAX_OPENFILES @@ -665,7 +684,7 @@ public class RatioBasedCompactionPlannerTest { var params = createPlanningParams(all, all, Set.of(), 3, CompactionKind.SYSTEM, conf); var plan = planner.makePlan(params); var job = getOnlyElement(plan.getJobs()); - assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4), job.getFiles()); + assertEquals(createCFs(1000, 1.9), job.getFiles()); } private CompactionJob createJob(CompactionKind kind, Set<CompactableFile> all, @@ -831,13 +850,19 @@ public class RatioBasedCompactionPlannerTest { } private static CompactionPlanner.InitParameters getInitParams(Configuration conf, String groups) { - String maxOpen = conf.get(prefix + "cs1.planner.opts.maxOpen"); + Map<String,String> options = new HashMap<>(); + + var optsPrefix = prefix + "cs1.planner.opts."; + + conf.forEach(e -> { + if (e.getKey().startsWith(optsPrefix)) { + options.put(e.getKey().substring(optsPrefix.length()), e.getValue()); + } + }); options.put("groups", groups.replaceAll("'", "\"")); - if (maxOpen != null) { - options.put("maxOpen", maxOpen); - } else { + if (!options.containsKey("maxOpen")) { options.put("maxOpen", "15"); }