This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 22c3e23695 Fixed compaction planning search bug (#5620)
22c3e23695 is described below

commit 22c3e23695d3c4c6c8b8812b69f877df161c8615
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Mon Jun 9 15:59:29 2025 -0400

    Fixed compaction planning search bug (#5620)
    
    Pulled changes from bf2b89e1ec #5620 in 2.1 into main and resolved
    conflicts.  This partially resolves #5634
---
 .../compaction/RatioBasedCompactionPlanner.java    | 99 ++++++++++++----------
 .../RatioBasedCompactionPlannerTest.java           | 57 +++++++++----
 2 files changed, 95 insertions(+), 61 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
 
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
index 0bfdd77589..d6ce07c4f8 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlanner.java
@@ -118,9 +118,11 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
  * </ol>
  * For example, given a tablet with 20 files, and table.file.max is 15 and no 
compactions are
  * planned. If the compaction ratio is set to 3, then this plugin will find 
the largest compaction
- * ratio less than 3 that results in a compaction.
+ * ratio less than 3 that results in a compaction. The lowest compaction ratio 
that will be
+ * considered in this search defaults to 1.1. Starting in 2.1.4, the lower 
bound for the search can
+ * be set using {@code 
tserver.compaction.major.service.<service>.opts.lowestRatio}
  *
- * @since 3.1.0
+ * @since 4.0.0
  * @see org.apache.accumulo.core.spi.compaction
  */
 
@@ -171,6 +173,7 @@ public class RatioBasedCompactionPlanner implements 
CompactionPlanner {
 
   private List<CompactionGroup> groups;
   private int maxFilesToCompact;
+  private double lowestRatio;
 
   @SuppressFBWarnings(value = {"UWF_UNWRITTEN_FIELD", "NP_UNWRITTEN_FIELD"},
       justification = "Field is written by Gson")
@@ -223,6 +226,10 @@ public class RatioBasedCompactionPlanner implements 
CompactionPlanner {
       }
     });
 
+    lowestRatio = 
Double.parseDouble(params.getOptions().getOrDefault("lowestRatio", "1.1"));
+    Preconditions.checkArgument(lowestRatio >= 1.0, "lowestRatio must be >= 
1.0 not %s",
+        lowestRatio);
+
     determineMaxFilesToCompact(params);
   }
 
@@ -379,52 +386,54 @@ public class RatioBasedCompactionPlanner implements 
CompactionPlanner {
    */
   private List<Collection<CompactableFile>> findFilesToCompactWithLowerRatio(
       PlanningParameters params, long maxSizeToCompact, int maxTabletFiles) {
-    double lowRatio = 1.0;
-    double highRatio = params.getRatio();
-
-    Preconditions.checkArgument(highRatio >= lowRatio);
 
     var candidates = Set.copyOf(params.getCandidates());
-    Collection<CompactableFile> found = Set.of();
-
-    int goalCompactionSize = candidates.size() - maxTabletFiles + 1;
-    if (goalCompactionSize > maxFilesToCompact) {
-      // The tablet is way over max tablet files, so multiple compactions will 
be needed. Therefore,
-      // do not set a goal size for this compaction and find the largest 
compaction ratio that will
-      // compact some set of files.
-      goalCompactionSize = 0;
-    }
-
-    // Do a binary search of the compaction ratios.
-    while (highRatio - lowRatio > .1) {
-      double ratioToCheck = (highRatio - lowRatio) / 2 + lowRatio;
-
-      // This is continually resorting the list of files in the following 
call, could optimize this
-      var filesToCompact =
-          findDataFilesToCompact(candidates, ratioToCheck, maxFilesToCompact, 
maxSizeToCompact);
-
-      log.trace("Tried ratio {} and found {} {} {} {}", ratioToCheck, 
filesToCompact,
-          filesToCompact.size() >= goalCompactionSize, goalCompactionSize, 
maxFilesToCompact);
+    List<CompactableFile> sortedFiles = sortAndLimitByMaxSize(candidates, 
maxSizeToCompact);
+
+    List<CompactableFile> found = List.of();
+    double largestRatioSeen = Double.MIN_VALUE;
+
+    if (sortedFiles.size() > 1) {
+      int windowStart = 0;
+      int windowEnd = Math.min(sortedFiles.size(), maxFilesToCompact);
+
+      while (windowEnd <= sortedFiles.size()) {
+        var filesInWindow = sortedFiles.subList(windowStart, windowEnd);
+
+        long sum = filesInWindow.get(0).getEstimatedSize();
+        for (int i = 1; i < filesInWindow.size(); i++) {
+          long size = filesInWindow.get(i).getEstimatedSize();
+          sum += size;
+          if (size > 0) {
+            // This is the compaction ratio needed to compact these files
+            double neededCompactionRatio = sum / (double) size;
+            log.trace("neededCompactionRatio:{} files:{}", 
neededCompactionRatio,
+                filesInWindow.subList(0, i + 1));
+            if (neededCompactionRatio >= largestRatioSeen) {
+              largestRatioSeen = neededCompactionRatio;
+              found = filesInWindow.subList(0, i + 1);
+            }
+          } else {
+            log.warn("Unexpected size seen for file {} {} {}", 
params.getTabletId(),
+                filesInWindow.get(i).getFileName(), size);
+          }
+        }
 
-      if (filesToCompact.isEmpty() || filesToCompact.size() < 
goalCompactionSize) {
-        highRatio = ratioToCheck;
-      } else {
-        lowRatio = ratioToCheck;
-        found = filesToCompact;
+        windowStart++;
+        windowEnd++;
       }
-    }
+    } // else all of the files are too large
 
-    if (found.isEmpty() && lowRatio == 1.0) {
+    if (found.isEmpty() || largestRatioSeen <= lowestRatio) {
       var examinedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact);
       var excludedBecauseMaxSize = candidates.size() - examinedFiles.size();
       var tabletId = params.getTabletId();
 
-      log.warn(
-          "Unable to plan compaction for {} that has too many files. {}:{} 
num_files:{} "
-              + "excluded_large_files:{} max_compaction_size:{} 
ratio_search_range:{},{} ",
+      log.warn("Unable to plan compaction for {} that has too many files. 
{}:{} num_files:{} "
+          + "excluded_large_files:{} max_compaction_size:{} ratio:{} 
largestRatioSeen:{} lowestRatio:{}",
           tabletId, Property.TABLE_FILE_MAX.getKey(), maxTabletFiles, 
candidates.size(),
-          excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), 
highRatio,
-          params.getRatio());
+          excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), 
params.getRatio(),
+          largestRatioSeen, lowestRatio);
       if (log.isDebugEnabled()) {
         var sizesOfExamined = examinedFiles.stream()
             .map(compactableFile -> 
NumUtil.bigNumberForSize(compactableFile.getEstimatedSize()))
@@ -437,14 +446,14 @@ public class RatioBasedCompactionPlanner implements 
CompactionPlanner {
         log.debug("Failed planning details for {} examined_file_sizes:{} 
excluded_file_sizes:{}",
             tabletId, sizesOfExamined, sizesOfExcluded);
       }
+      found = List.of();
+    } else {
+      log.info(
+          "For {} found {} files to compact lowering compaction ratio from {} 
to {} because the tablet "
+              + "exceeded {} files, it had {}",
+          params.getTabletId(), found.size(), params.getRatio(), 
largestRatioSeen, maxTabletFiles,
+          params.getCandidates().size());
     }
-
-    log.info(
-        "For {} found {} files to compact lowering compaction ratio from {} to 
{} because the tablet "
-            + "exceeded {} files, it had {}",
-        params.getTabletId(), found.size(), params.getRatio(), lowRatio, 
maxTabletFiles,
-        params.getCandidates().size());
-
     if (found.isEmpty()) {
       return List.of();
     } else {
diff --git 
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
 
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
index 7484602e92..5c18325a34 100644
--- 
a/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
+++ 
b/core/src/test/java/org/apache/accumulo/core/spi/compaction/RatioBasedCompactionPlannerTest.java
@@ -42,6 +42,7 @@ import 
org.apache.accumulo.core.client.admin.compaction.CompactableFile;
 import org.apache.accumulo.core.clientImpl.Namespace;
 import org.apache.accumulo.core.conf.ConfigurationTypeHelper;
 import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.conf.SiteConfiguration;
 import org.apache.accumulo.core.data.NamespaceId;
 import org.apache.accumulo.core.data.TableId;
 import org.apache.accumulo.core.data.TabletId;
@@ -51,6 +52,7 @@ import org.apache.accumulo.core.spi.common.ServiceEnvironment;
 import org.apache.accumulo.core.spi.common.ServiceEnvironment.Configuration;
 import org.apache.accumulo.core.spi.compaction.CompactionPlan.Builder;
 import 
org.apache.accumulo.core.spi.compaction.CompactionPlanner.InitParameters;
+import org.apache.accumulo.core.util.ConfigurationImpl;
 import org.apache.accumulo.core.util.compaction.CompactionJobImpl;
 import org.apache.accumulo.core.util.compaction.CompactionJobPrioritizer;
 import org.apache.accumulo.core.util.compaction.CompactionPlanImpl;
@@ -546,13 +548,14 @@ public class RatioBasedCompactionPlannerTest {
     overrides.put(Property.TABLE_FILE_MAX.getKey(), "7");
     var conf = ServiceEnvironment.Configuration.from(overrides, false);
 
-    // For this case need to compact three files and the highest ratio that 
achieves that is 1.8
+    // The highest ratio is 1.9 so should compact this. Will need a subsequent 
compaction to bring
+    // it below the limit.
     var planner = createPlanner(conf, groups);
     var all = createCFs(1000, 1.1, 1.9, 1.8, 1.6, 1.3, 1.4, 1.3, 1.2, 1.1);
     var params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     var plan = planner.makePlan(params);
     var job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(1000, 1.1, 1.9, 1.8), job.getFiles());
+    assertEquals(createCFs(1000, 1.1, 1.9), job.getFiles());
 
     // For this case need to compact two files and the highest ratio that 
achieves that is 2.9
     all = createCFs(1000, 2, 2.9, 2.8, 2.7, 2.6, 2.5, 2.4, 2.3);
@@ -585,21 +588,26 @@ public class RatioBasedCompactionPlannerTest {
       assertEquals(createCFs(1000, 1.9), job.getFiles());
     }
 
-    // In this case the tablet can be brought below the max limit in single 
compaction, so it should
-    // find this
+    // The max compaction ratio is the first two files, so should compact 
these. Will require
+    // multiple compactions to bring the tablet below the limit.
     all =
         createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1, 1.1, 1.1, 
1.1, 1.1, 1.1, 1.1);
     params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     plan = planner.makePlan(params);
     job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4, 1.5, 1.2, 1.1), 
job.getFiles());
+    assertEquals(createCFs(1000, 1.9), job.getFiles());
+
+    all = createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54, 2.3, 1.7, 1.5, 1.4);
+    params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
+    plan = planner.makePlan(params);
+    job = getOnlyElement(plan.getJobs());
+    assertEquals(createCFs(10, 1.3, 2.2, 2.51, 1.02, 1.7, 2.54), 
job.getFiles());
 
     // each file is 10x the size of the file smaller than it
     all = createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1);
     params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     plan = planner.makePlan(params);
-    job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(10, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1), 
job.getFiles());
+    assertTrue(plan.getJobs().isEmpty());
 
     // test with some files growing 20x, ensure those are not included
     for (var ratio : List.of(1.9, 2.0, 3.0, 4.0)) {
@@ -609,7 +617,6 @@ public class RatioBasedCompactionPlannerTest {
       job = getOnlyElement(plan.getJobs());
       assertEquals(createCFs(10, 1.05, 1.05, 1.25, 1.75), job.getFiles());
     }
-
   }
 
   @Test
@@ -640,12 +647,24 @@ public class RatioBasedCompactionPlannerTest {
 
     assertTrue(plan.getJobs().isEmpty());
 
-    // a really bad situation, each file is 20 times the size of its smaller 
file. The algorithm
-    // does not search that for ratios that low.
-    all = createCFs(10, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
+    // a really bad situation, each file is 20 times the size of its smaller 
file. By default, the
+    // algorithm does not search that for ratios that low.
+    all = createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05);
     params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     plan = planner.makePlan(params);
     assertTrue(plan.getJobs().isEmpty());
+
+    // adjust the config for the lowest search ratio and recreate the planner, 
this should allow a
+    // compaction to happen
+    var overrides2 = new HashMap<>(overrides);
+    overrides2.put(Property.COMPACTION_SERVICE_PREFIX.getKey() + 
"cs1.planner.opts.lowestRatio",
+        "1.04");
+    var conf2 = new 
ConfigurationImpl(SiteConfiguration.empty().withOverrides(overrides2).build());
+    var planner2 = createPlanner(conf2, groups);
+    params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
+    plan = planner2.makePlan(params);
+    var job2 = getOnlyElement(plan.getJobs());
+    assertEquals(createCFs(3, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05), 
job2.getFiles());
   }
 
   // Test to ensure that plugin falls back from TABLE_FILE_MAX to 
TSERV_SCAN_MAX_OPENFILES
@@ -665,7 +684,7 @@ public class RatioBasedCompactionPlannerTest {
     var params = createPlanningParams(all, all, Set.of(), 3, 
CompactionKind.SYSTEM, conf);
     var plan = planner.makePlan(params);
     var job = getOnlyElement(plan.getJobs());
-    assertEquals(createCFs(1000, 1.9, 1.8, 1.7, 1.6, 1.5, 1.4), 
job.getFiles());
+    assertEquals(createCFs(1000, 1.9), job.getFiles());
   }
 
   private CompactionJob createJob(CompactionKind kind, Set<CompactableFile> 
all,
@@ -831,13 +850,19 @@ public class RatioBasedCompactionPlannerTest {
   }
 
   private static CompactionPlanner.InitParameters getInitParams(Configuration 
conf, String groups) {
-    String maxOpen = conf.get(prefix + "cs1.planner.opts.maxOpen");
+
     Map<String,String> options = new HashMap<>();
+
+    var optsPrefix = prefix + "cs1.planner.opts.";
+
+    conf.forEach(e -> {
+      if (e.getKey().startsWith(optsPrefix)) {
+        options.put(e.getKey().substring(optsPrefix.length()), e.getValue());
+      }
+    });
     options.put("groups", groups.replaceAll("'", "\""));
 
-    if (maxOpen != null) {
-      options.put("maxOpen", maxOpen);
-    } else {
+    if (!options.containsKey("maxOpen")) {
       options.put("maxOpen", "15");
     }
 

Reply via email to