This is an automated email from the ASF dual-hosted git repository. ddanielr pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new 04ded6ecd1 Log more information about why compaction can not be planned (#5532) 04ded6ecd1 is described below commit 04ded6ecd1304b605b6973bef128ec42c71f9986 Author: Keith Turner <ktur...@apache.org> AuthorDate: Tue May 20 14:54:57 2025 -0400 Log more information about why compaction can not be planned (#5532) * Log more information about why compaction can not be planned For the case when a tablet has more than tablet.file.max files and a compaction can not be planned, log more information about what went into the planning process. * fix javadoc --- .../core/spi/compaction/CompactionPlanner.java | 7 +++ .../spi/compaction/DefaultCompactionPlanner.java | 65 ++++++++++++++++------ .../compaction/DefaultCompactionPlannerTest.java | 8 +++ .../org/apache/accumulo/core/util/NumUtilTest.java | 1 - .../tserver/compactions/CompactionService.java | 7 +++ 5 files changed, 71 insertions(+), 17 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java b/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java index 9f4946b357..7b401e49c5 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/compaction/CompactionPlanner.java @@ -26,6 +26,7 @@ import org.apache.accumulo.core.client.admin.CompactionConfig; import org.apache.accumulo.core.client.admin.compaction.CompactableFile; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.data.TabletId; import org.apache.accumulo.core.spi.common.ServiceEnvironment; /** @@ -94,6 +95,12 @@ public interface CompactionPlanner { */ TableId getTableId(); + /** + * @return the tablet for which a compaction is being planned + * @since 2.1.4 + */ + TabletId getTabletId(); + ServiceEnvironment getServiceEnvironment(); CompactionKind getKind(); diff --git a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java index 6c60d5095c..a0d8a7872f 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlanner.java @@ -28,12 +28,14 @@ import java.util.HashSet; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; import org.apache.accumulo.core.client.TableNotFoundException; import org.apache.accumulo.core.client.admin.compaction.CompactableFile; import org.apache.accumulo.core.conf.ConfigurationTypeHelper; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.spi.common.ServiceEnvironment; +import org.apache.accumulo.core.util.NumUtil; import org.apache.accumulo.core.util.compaction.CompactionJobPrioritizer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -402,18 +404,34 @@ public class DefaultCompactionPlanner implements CompactionPlanner { } if (found.isEmpty() && lowRatio == 1.0) { - // in this case the data must be really skewed, operator intervention may be needed. + var examinedFiles = sortAndLimitByMaxSize(candidates, maxSizeToCompact); + var excludedBecauseMaxSize = candidates.size() - examinedFiles.size(); + var tabletId = params.getTabletId(); + log.warn( - "Attempted to lower compaction ration from {} to {} for {} because there are {} files " - + "and the max tablet files is {}, however no set of files to compact were found.", - params.getRatio(), highRatio, params.getTableId(), params.getCandidates().size(), - maxTabletFiles); + "Unable to plan compaction for {} that has too many files. {}:{} num_files:{} " + + "excluded_large_files:{} max_compaction_size:{} ratio_search_range:{},{} ", + tabletId, Property.TABLE_FILE_MAX.getKey(), maxTabletFiles, candidates.size(), + excludedBecauseMaxSize, NumUtil.bigNumberForSize(maxSizeToCompact), highRatio, + params.getRatio()); + if (log.isDebugEnabled()) { + var sizesOfExamined = examinedFiles.stream() + .map(compactableFile -> NumUtil.bigNumberForSize(compactableFile.getEstimatedSize())) + .collect(Collectors.toList()); + HashSet<CompactableFile> excludedFiles = new HashSet<>(candidates); + examinedFiles.forEach(excludedFiles::remove); + var sizesOfExcluded = excludedFiles.stream() + .map(compactableFile -> NumUtil.bigNumberForSize(compactableFile.getEstimatedSize())) + .collect(Collectors.toList()); + log.debug("Failed planning details for {} examined_file_sizes:{} excluded_file_sizes:{}", + tabletId, sizesOfExamined, sizesOfExcluded); + } } log.info( "For {} found {} files to compact lowering compaction ratio from {} to {} because the tablet " + "exceeded {} files, it had {}", - params.getTableId(), found.size(), params.getRatio(), lowRatio, maxTabletFiles, + params.getTabletId(), found.size(), params.getRatio(), lowRatio, maxTabletFiles, params.getCandidates().size()); return found; @@ -482,15 +500,18 @@ public class DefaultCompactionPlanner implements CompactionPlanner { return sortedFiles.subList(0, numToCompact); } - static Collection<CompactableFile> findDataFilesToCompact(Set<CompactableFile> files, - double ratio, int maxFilesToCompact, long maxSizeToCompact) { - if (files.size() <= 1) { - return Collections.emptySet(); - } - + /** + * @return a list of the smallest files where the sum of the sizes is less than maxSizeToCompact + */ + static List<CompactableFile> sortAndLimitByMaxSize(Set<CompactableFile> files, + long maxSizeToCompact) { // sort files from smallest to largest. So position 0 has the smallest file. List<CompactableFile> sortedFiles = sortByFileSize(files); + if (maxSizeToCompact == Long.MAX_VALUE) { + return sortedFiles; + } + int maxSizeIndex = sortedFiles.size(); long sum = 0; for (int i = 0; i < sortedFiles.size(); i++) { @@ -502,10 +523,22 @@ public class DefaultCompactionPlanner implements CompactionPlanner { } if (maxSizeIndex < sortedFiles.size()) { - sortedFiles = sortedFiles.subList(0, maxSizeIndex); - if (sortedFiles.size() <= 1) { - return Collections.emptySet(); - } + return sortedFiles.subList(0, maxSizeIndex); + } else { + return sortedFiles; + } + } + + static Collection<CompactableFile> findDataFilesToCompact(Set<CompactableFile> files, + double ratio, int maxFilesToCompact, long maxSizeToCompact) { + + if (files.size() <= 1) { + return Collections.emptySet(); + } + + List<CompactableFile> sortedFiles = sortAndLimitByMaxSize(files, maxSizeToCompact); + if (sortedFiles.size() <= 1) { + return Collections.emptySet(); } int windowStart = 0; diff --git a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java index f3186701ad..4e5f1c4d86 100644 --- a/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java +++ b/core/src/test/java/org/apache/accumulo/core/spi/compaction/DefaultCompactionPlannerTest.java @@ -45,6 +45,9 @@ import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.conf.SiteConfiguration; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.data.TabletId; +import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.dataImpl.TabletIdImpl; import org.apache.accumulo.core.spi.common.ServiceEnvironment; import org.apache.accumulo.core.spi.common.ServiceEnvironment.Configuration; import org.apache.accumulo.core.spi.compaction.CompactionPlan.Builder; @@ -754,6 +757,11 @@ public class DefaultCompactionPlannerTest { return TableId.of("42"); } + @Override + public TabletId getTabletId() { + return new TabletIdImpl(new KeyExtent(getTableId(), null, null)); + } + @Override public ServiceEnvironment getServiceEnvironment() { ServiceEnvironment senv = EasyMock.createMock(ServiceEnvironment.class); diff --git a/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java b/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java index 0473fdcaa6..92526465ca 100644 --- a/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java +++ b/core/src/test/java/org/apache/accumulo/core/util/NumUtilTest.java @@ -27,7 +27,6 @@ import java.util.Locale; import org.junit.jupiter.api.Test; public class NumUtilTest { - @Test public void testBigNumberForSize() { Locale.setDefault(Locale.US); diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java index 49d5644795..2d60918253 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/compactions/CompactionService.java @@ -47,7 +47,9 @@ import org.apache.accumulo.core.conf.ConfigurationTypeHelper; import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.data.NamespaceId; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.data.TabletId; import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.dataImpl.TabletIdImpl; import org.apache.accumulo.core.spi.common.ServiceEnvironment; import org.apache.accumulo.core.spi.compaction.CompactionExecutorId; import org.apache.accumulo.core.spi.compaction.CompactionJob; @@ -247,6 +249,11 @@ public class CompactionService { return comp.getTableId(); } + @Override + public TabletId getTabletId() { + return new TabletIdImpl(comp.getExtent()); + } + @Override public ServiceEnvironment getServiceEnvironment() { return senv;