This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new bf2b89e1ec Fixed compaction planning search bug (#5620)
bf2b89e1ec is described below

commit bf2b89e1ecdef9de5ef4716e601d42c7286900b1
Author: Keith Turner <[email protected]>
AuthorDate: Mon Jun 9 15:59:29 2025 -0400

    Fixed compaction planning search bug (#5620)
    
    When a tablet has too many files and its not compacting with the
    configured compaction ratio the planner would search for the highest
    ratio that would cause a compaction.  Encountered a problem with this
    search were two constraints in the code would cause nothing to be found.
    One constraint was a check to only accept compactions that satisfied
    `filesToCompact.size() < goalCompactionSize`.  The other constraint was
    that `DefaultCompactionPlanner.findDataFilesToCompact()` would find the
    largest set of small files to compact. Putting these two together in
    some cases `findDataFilesToCompact()` would keep finding a set of files
    smaller than `goalCompactionSize` and log a warning that it could not
    find anything.  However the set it did find would have been good to
    compact.
    
    Looking into fixing this came to the conclusion that the behavior of
    `findDataFilesToCompact()` where it finds the largest set of small files
    to compact is probably not optimal when varying the compaction ratio.
    Also its probably best to not set a minimum size to compact, its ok if
    it takes multiple compactions to resolve the issue.  Changed the code to
    find the files with highest ratio and dropped those two other
    constraints.
    
    Added a test that causes the two constraints to conflict and this new
    test will not pass with the old code.  Adjusted some of the existing
    test because of the change in behavior.
    
    Added a new config option to limit the minimum compaction ratio of the
    search. Defaulted this to 1.1 which may be extreme, not sure.  Files with
    a difference in size of 10x will have a ratio of 1.1.  This is roughly
    the minimum ratio that the old code would search.
    
    
    Co-authored-by: Dave Marion <[email protected]>
---
 .../spi/compaction/DefaultCompactionPlanner.java   | 98 ++++++++++++----------
 .../compaction/DefaultCompactionPlannerTest.java   | 55 ++++++++----
 2 files changed, 90 insertions(+), 63 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
 
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
index a0d8a7872f..e0985329eb 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java
@@ -124,8 +124,9 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
  * </ol>
  * For example, given a tablet with 20 files, and table.file.max is 15 and no 
compactions are
  * planned. If the compaction ratio is set to 3, then this plugin will find 
the largest compaction
- * ratio less than 3 that results in a compaction.
- *
+ * ratio less than 3 that results in a compaction. The lowest compaction ratio 
that will be
+ * considered in this search defaults to 1.1. Starting in 2.1.4, the lower 
bound for the search can
+ * be set using {@code 
tserver.compaction.major.service.<service>.opts.lowestRatio}
  *
  * @since 2.1.0
  * @see org.apache.accumulo.core.spi.compaction
@@ -165,6 +166,7 @@ public class DefaultCompactionPlanner implements 
CompactionPlanner {
 
   private List<Executor> executors;
   private int maxFilesToCompact;
+  private double lowestRatio;
 
   @SuppressFBWarnings(value = {"UWF_UNWRITTEN_FIELD", "NP_UNWRITTEN_FIELD"},
       justification = "Field is written by Gson")
@@ -229,6 +231,10 @@ public class DefaultCompactionPlanner implements 
CompactionPlanner {
               "Duplicate maxSize set in executors. " + 
params.getOptions().get("executors"));
         }
       });
+
+      lowestRatio = 
Double.parseDouble(params.getOptions().getOrDefault("lowestRatio", "1.1"));
+      Preconditions.checkArgument(lowestRatio >= 1.0, "lowestRatio must be >= 
1.0 not %s",
+          lowestRatio);
     } else {
       throw new IllegalStateException("No defined executors for this planner");
     }
@@ -368,52 +374,54 @@ public class DefaultCompactionPlanner implements 
CompactionPlanner {
    */
   private Collection<CompactableFile> 
findFilesToCompactWithLowerRatio(PlanningParameters params,
       long maxSizeToCompact, int maxTabletFiles) {
-    double lowRatio = 1.0;
-    double highRatio = params.getRatio();
-
-    Preconditions.checkArgument(highRatio >= lowRatio);
 
     var candidates = Set.copyOf(params.getCandidates());
-    Collection<CompactableFile> found = Set.of();
-
-    int goalCompactionSize = candidates.size() - maxTabletFiles + 1;
-    if (goalCompactionSize > maxFilesToCompact) {
-      // The tablet is way over max tablet files, so multiple compactions will 
be needed. Therefore,
-      // do not set a goal size for this compaction and find the largest 
compaction ratio that will
-      // compact some set of files.
-      goalCompactionSize = 0;
-    }
-
-    // Do a binary search of the compaction ratios.
-    while (highRatio - lowRatio > .1) {
-      double ratioToCheck = (highRatio - lowRatio) / 2 + lowRatio;
-
-      // This is continually resorting the list of files in the following 
call, could optimize this
-      var filesToCompact =
-          findDataFilesToCompact(candidates, ratioToCheck, maxFilesToCompact, 
maxSizeToCompact);
-
-      log.trace("Tried ratio {} and found {} {} {}", ratioToCheck, 
filesToCompact,
-          filesToCompact.size() >= goalCompactionSize, goalCompactionSize);
+    List<CompactableFile> sortedFiles = sortAndLimitByMaxSize(candidates, 
maxSizeToCompact);
+
+    List<CompactableFile> found = List.of();
+    double largestRatioSeen = Double.MIN_VALUE;
+
+    if (sortedFiles.size() > 1) {
+      int windowStart = 0;
+      int windowEnd = Math.min(sortedFiles.size(), maxFilesToCompact);
+
+      while (windowEnd <= sortedFiles.size()) {
+        var filesInWindow = sortedFiles.subList(windowStart, windowEnd);
+
+        long sum = filesInWindow.get(0).getEstimatedSize();
+        for (int i = 1; i < filesInWindow.size(); i++) {
+          long size = filesInWindow.get(i).getEstimatedSize();
+          sum += size;
+          if (size > 0) {
+            // This is the compaction ratio needed to compact these files
+            double neededCompactionRatio = sum / (double) size;
+            log.trace("neededCompactionRatio:{} files:{}", 
neededCompactionRatio,
+                filesInWindow.subList(0, i + 1));
+            if (neededCompactionRatio >= largestRatioSeen) {
+              largestRatioSeen = neededCompactionRatio;
+              found = filesInWindow.subList(0, i + 1);
+            }
+          } else {
+            log.warn("Unexpected size seen for file {} {} {}", 
params.getTabletId(),
+                filesInWindow.get(i).getFileName(), size);
+          }
+        }
 
-      if (filesToCompact.isEmpty() || filesToCompact.size() < 
goalCompactionSize) {
-        highRatio = ratioToCheck;
-      } else {
-        lowRatio = ratioToCheck;
-        found = filesToCompact;
+        windowStart++;
+        windowEnd++;
       }
-    }
+    } // else all of the files are too large
 
-    if (found.isEmpty() && lowRatio == 1.0) {
+    if (found.isEmpty() || largestRatioSeen <= lowestRatio) {
       var examinedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact);
       var excludedBecauseMaxSize = candidates.size() - examinedFiles.size();
       var tabletId = params.getTabletId();
 
-      log.warn(
-          "Unable to plan compaction for {} that has too many files. {}:{} 
num_files:{} "
-              + "excluded_large_files:{} max_compaction_size:{} 
ratio_search_range:{},{} ",
+      log.warn("Unable to plan compaction for {} that has too many files. 
{}:{} num_files:{} "
+          + "excluded_large_files:{} max_compaction_size:{} ratio:{} 
largestRatioSeen:{} lowestRatio:{}",
           tabletId, Property.TABLE_FILE_MAX.getKey(), maxTabletFiles, 
candidates.size(),
-          excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), 
highRatio,
-          params.getRatio());
+          excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), 
params.getRatio(),
+          largestRatioSeen, lowestRatio);
       if (log.isDebugEnabled()) {
         var sizesOfExamined = examinedFiles.stream()
             .map(compactableFile -> 
NumUtil.bigNumberForSize(compactableFile.getEstimatedSize()))
@@ -426,14 +434,14 @@ public class DefaultCompactionPlanner implements 
CompactionPlanner {
         log.debug("Failed planning details for {} examined_file_sizes:{} 
excluded_file_sizes:{}",
             tabletId, sizesOfExamined, sizesOfExcluded);
       }
+      found = List.of();
+    } else {
+      log.info(
+          "For {} found {} files to compact lowering compaction ratio from {} 
to {} because the tablet "
+              + "exceeded {} files, it had {}",
+          params.getTabletId(), found.size(), params.getRatio(), 
largestRatioSeen, maxTabletFiles,
+          params.getCandidates().size());
     }
-
-    log.info(
-        "For {} found {} files to compact lowering compaction ratio from {} to 
{} because the tablet "
-            + "exceeded {} files, it had {}",
-        params.getTabletId(), found.size(), params.getRatio(), lowRatio, 
maxTabletFiles,
-        params.getCandidates().size());
-
     return found;
   }
 
diff --git 
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
 
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
index 4e5f1c4d86..b2cf3287f5 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java
@@ -525,13 +525,14 @@ public class DefaultCompactionPlannerTest {
     overrides.put(Property.TABLE_FILE_MAX.getKey(), "7");
     var conf = new 
ConfigurationImpl(SiteConfiguration.empty().withOverrides(overrides).build());
 
-    // For this case need to compact three files and the highest ratio that 
achieves that is 1.8
+    // The highest ratio is 1.9 so should compact this. Will need a subsequent 
compaction to bring
+    // it below the limit.
     var planner = createPlanner(conf, executors);
     var all = createCFs(1000, 1.1, 1.9, 1.8, 1.6, 1.3, 1.4, 1.3, 1.2, 1.1);
     var params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     var plan = planner.makePlan(params);
     var job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(1000, 1.1, 1.9, 1.8), job.getFiles());
+    assertEquals(createCFs(1000, 1.1, 1.9), job.getFiles());
 
     // For this case need to compact two files and the highest ratio that 
achieves that is 2.9
     all = createCFs(1000, 2, 2.9, 2.8, 2.7, 2.6, 2.5, 2.4, 2.3);
@@ -564,21 +565,26 @@ public class DefaultCompactionPlannerTest {
       assertEquals(createCFs(1000, 1.9), job.getFiles());
     }
 
-    // In this case the tablet can be brought below the max limit in single 
compaction, so it should
-    // find this
+    // The max compaction ratio is the first two files, so should compact 
these. Will require
+    // multiple compactions to bring the tablet below the limit.
     all =
         createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1, 1.1, 1.1, 
1.1, 1.1, 1.1, 1.1);
     params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     plan = planner.makePlan(params);
     job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1), 
job.getFiles());
+    assertEquals(createCFs(1000, 1.9), job.getFiles());
+
+    all = createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54, 2.3, 1.7, 1.5, 1.4);
+    params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
+    plan = planner.makePlan(params);
+    job = getOnlyElement(plan.getJobs());
+    assertEquals(createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54), 
job.getFiles());
 
     // each file is 10x the size of the file smaller than it
     all = createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1);
     params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     plan = planner.makePlan(params);
-    job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1), 
job.getFiles());
+    assertTrue(plan.getJobs().isEmpty());
 
     // test with some files growing 20x, ensure those are not included
     for (var ratio : List.of(1.9, 2.0, 3.0, 4.0)) {
@@ -588,7 +594,6 @@ public class DefaultCompactionPlannerTest {
       job = getOnlyElement(plan.getJobs());
       assertEquals(createCFs(10, 1.05, 1.05, 1.25, 1.75), job.getFiles());
     }
-
   }
 
   @Test
@@ -621,12 +626,24 @@ public class DefaultCompactionPlannerTest {
 
     assertTrue(plan.getJobs().isEmpty());
 
-    // a really bad situation, each file is 20 times the size of its smaller 
file. The algorithm
-    // does not search that for ratios that low.
-    all = createCFs(10, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
+    // a really bad situation, each file is 20 times the size of its smaller 
file. By default, the
+    // algorithm does not search that for ratios that low.
+    all = createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
     params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     plan = planner.makePlan(params);
     assertTrue(plan.getJobs().isEmpty());
+
+    // adjust the config for the lowest search ratio and recreate the planner, 
this should allow a
+    // compaction to happen
+    var overrides2 = new HashMap<>(overrides);
+    overrides2.put(
+        Property.TSERV_COMPACTION_SERVICE_PREFIX.getKey() + 
"cs1.planner.opts.lowestRatio", "1.04");
+    var conf2 = new 
ConfigurationImpl(SiteConfiguration.empty().withOverrides(overrides2).build());
+    var planner2 = createPlanner(conf2, executors);
+    params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
+    plan = planner2.makePlan(params);
+    var job2 = getOnlyElement(plan.getJobs());
+    assertEquals(createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05), 
job2.getFiles());
   }
 
   // Test to ensure that plugin falls back from TABLE_FILE_MAX to 
TSERV_SCAN_MAX_OPENFILES
@@ -648,7 +665,7 @@ public class DefaultCompactionPlannerTest {
     var params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     var plan = planner.makePlan(params);
     var job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4), 
job.getFiles());
+    assertEquals(createCFs(1000, 1.9), job.getFiles());
   }
 
   private CompactionJob createJob(CompactionKind kind, Set<CompactableFile> 
all,
@@ -811,14 +828,16 @@ public class DefaultCompactionPlannerTest {
   private static CompactionPlanner.InitParameters getInitParams(Configuration 
conf,
       String executors) {
 
-    String maxOpen =
-        conf.get(Property.TSERV_COMPACTION_SERVICE_PREFIX.getKey() + 
"cs1.planner.opts.maxOpen");
     Map<String,String> options = new HashMap<>();
-    options.put("executors", executors.replaceAll("'", "\""));
 
-    if (maxOpen != null) {
-      options.put("maxOpen", maxOpen);
-    }
+    String prefix = Property.TSERV_COMPACTION_SERVICE_PREFIX.getKey() + 
"cs1.planner.opts.";
+
+    conf.forEach(e -> {
+      if (e.getKey().startsWith(prefix)) {
+        options.put(e.getKey().substring(prefix.length()), e.getValue());
+      }
+    });
+    options.put("executors", executors.replaceAll("'", "\""));
 
     ServiceEnvironment senv = EasyMock.createMock(ServiceEnvironment.class);
     EasyMock.expect(senv.getConfiguration()).andReturn(conf).anyTimes();

Reply via email to