This is an automated email from the ASF dual-hosted git repository. ddanielr pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 1a5368e9834aa3089948a133398036279f782d14 Merge: 66f49ef8c3 fd3e837033 Author: Daniel Roberts <ddani...@gmail.com> AuthorDate: Fri Jul 26 02:17:52 2024 +0000 Merge branch '2.1' .../accumulo/core/gc/ReferenceDirectory.java | 56 ---------------------- .../org/apache/accumulo/core/gc/ReferenceFile.java | 23 ++++++--- .../accumulo/server/gc/AllVolumesDirectory.java | 2 +- .../main/java/org/apache/accumulo/gc/GCRun.java | 3 +- .../accumulo/gc/GarbageCollectionAlgorithm.java | 6 +-- .../apache/accumulo/gc/GarbageCollectionTest.java | 3 +- 6 files changed, 21 insertions(+), 72 deletions(-) diff --cc core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java index 3044ec2f67,8a2b63cfcb..492e67c2e4 --- a/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java +++ b/core/src/main/java/org/apache/accumulo/core/gc/ReferenceFile.java @@@ -21,9 -21,7 +21,10 @@@ package org.apache.accumulo.core.gc import java.util.Objects; import org.apache.accumulo.core.data.TableId; +import org.apache.accumulo.core.metadata.ScanServerRefTabletFile; +import org.apache.accumulo.core.metadata.StoredTabletFile; + import org.apache.accumulo.core.metadata.schema.MetadataSchema; +import org.apache.hadoop.fs.Path; /** * A GC reference used for streaming and delete markers. This type is a file. Subclass is a @@@ -33,34 -31,30 +34,42 @@@ public class ReferenceFile implements R // parts of an absolute URI, like "hdfs://1.2.3.4/accumulo/tables/2a/t-0003" public final TableId tableId; // 2a public final boolean isScan; + public final boolean isDirectory; - // the exact string that is stored in the metadata - protected final String metadataEntry; + // the exact path from the file reference string that is stored in the metadata + protected final String metadataPath; - protected ReferenceFile(TableId tableId, String metadataPath, boolean isScan) { - protected ReferenceFile(TableId tableId, String metadataEntry, boolean isScan, ++ protected ReferenceFile(TableId tableId, String metadataPath, boolean isScan, + boolean isDirectory) { this.tableId = Objects.requireNonNull(tableId); - this.metadataEntry = Objects.requireNonNull(metadataEntry); + this.metadataPath = Objects.requireNonNull(metadataPath); this.isScan = isScan; + this.isDirectory = isDirectory; } - public static ReferenceFile forFile(TableId tableId, String metadataEntry) { - return new ReferenceFile(tableId, metadataEntry, false, false); + public static ReferenceFile forFile(TableId tableId, StoredTabletFile tabletFile) { - return new ReferenceFile(tableId, tabletFile.getMetadataPath(), false); ++ return new ReferenceFile(tableId, tabletFile.getMetadataPath(), false, false); } - public static ReferenceFile forScan(TableId tableId, String metadataEntry) { - return new ReferenceFile(tableId, metadataEntry, true, false); + public static ReferenceFile forFile(TableId tableId, Path metadataPathPath) { - return new ReferenceFile(tableId, metadataPathPath.toString(), false); ++ return new ReferenceFile(tableId, metadataPathPath.toString(), false, false); + } + + public static ReferenceFile forScan(TableId tableId, ScanServerRefTabletFile tabletFile) { - return new ReferenceFile(tableId, tabletFile.getNormalizedPathStr(), true); ++ return new ReferenceFile(tableId, tabletFile.getNormalizedPathStr(), true, false); + } + + public static ReferenceFile forScan(TableId tableId, StoredTabletFile tabletFile) { - return new ReferenceFile(tableId, tabletFile.getMetadataPath(), true); ++ return new ReferenceFile(tableId, tabletFile.getMetadataPath(), true, false); + } + + public static ReferenceFile forScan(TableId tableId, Path metadataPathPath) { - return new ReferenceFile(tableId, metadataPathPath.toString(), true); ++ return new ReferenceFile(tableId, metadataPathPath.toString(), true, false); + } + + public static ReferenceFile forDirectory(TableId tableId, String dirName) { + MetadataSchema.TabletsSection.ServerColumnFamily.validateDirCol(dirName); + return new ReferenceFile(tableId, dirName, false, true); } @Override diff --cc server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java index aff958b010,a4b328438f..4e85fc0a7b --- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java @@@ -50,10 -51,9 +50,9 @@@ import org.apache.accumulo.core.data.Ta import org.apache.accumulo.core.fate.zookeeper.ZooReader; import org.apache.accumulo.core.gc.GcCandidate; import org.apache.accumulo.core.gc.Reference; - import org.apache.accumulo.core.gc.ReferenceDirectory; import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.manager.state.tables.TableState; -import org.apache.accumulo.core.metadata.MetadataTable; +import org.apache.accumulo.core.metadata.AccumuloTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.StoredTabletFile; import org.apache.accumulo.core.metadata.ValidationUtil; diff --cc server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java index eb6b8fe281,419d10728f..0c2a23b108 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java @@@ -143,57 -146,54 +142,56 @@@ public class GarbageCollectionAlgorith List<GcCandidate> candidateEntriesToBeDeleted = new ArrayList<>(); Set<TableId> tableIdsBefore = gce.getCandidateTableIDs(); Set<TableId> tableIdsSeen = new HashSet<>(); - Iterator<Reference> iter = gce.getReferences().iterator(); - while (iter.hasNext()) { - Reference ref = iter.next(); - tableIdsSeen.add(ref.getTableId()); + try (Stream<Reference> references = gce.getReferences()) { + references.forEach(ref -> { + tableIdsSeen.add(ref.getTableId()); - if (ref.isDirectory()) { - ServerColumnFamily.validateDirCol(ref.getMetadataEntry()); + if (ref.isDirectory()) { - var dirReference = (ReferenceDirectory) ref; - ServerColumnFamily.validateDirCol(dirReference.getTabletDir()); ++ ServerColumnFamily.validateDirCol(ref.getMetadataPath()); - String dir = "/" + dirReference.tableId + "/" + dirReference.getTabletDir(); - String dir = "/" + ref.getTableId() + "/" + ref.getMetadataEntry(); ++ String dir = "/" + ref.getTableId() + "/" + ref.getMetadataPath(); - dir = makeRelative(dir, 2); + dir = makeRelative(dir, 2); - GcCandidate gcTemp = candidateMap.remove(dir); - if (gcTemp != null) { - log.debug("Directory Candidate was still in use by dir ref: {}", dir); - // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created once. - } - } else { - String reference = ref.getMetadataEntry(); - if (reference.startsWith("/")) { - log.debug("Candidate {} has a relative path, prepend tableId {}", reference, - ref.getTableId()); - reference = "/" + ref.getTableId() + ref.getMetadataEntry(); - } else if (!reference.contains(":") && !reference.startsWith("../")) { - throw new RuntimeException("Bad file reference " + reference); - } + GcCandidate gcTemp = candidateMap.remove(dir); + if (gcTemp != null) { + log.debug("Directory Candidate was still in use by dir ref: {}", dir); + // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created + // once. + } + } else { + String reference = ref.getMetadataPath(); + if (reference.startsWith("/")) { + log.debug("Candidate {} has a relative path, prepend tableId {}", reference, + ref.getTableId()); + reference = "/" + ref.getTableId() + ref.getMetadataPath(); + } else if (!reference.contains(":") && !reference.startsWith("../")) { + throw new RuntimeException("Bad file reference " + reference); + } - String relativePath = makeRelative(reference, 3); - - // WARNING: This line is EXTREMELY IMPORTANT. - // You MUST REMOVE candidates that are still in use - GcCandidate gcTemp = candidateMap.remove(relativePath); - if (gcTemp != null) { - log.debug("File Candidate was still in use: {}", relativePath); - // Prevent deletion of candidates that are still in use by scans, because they won't be - // recreated once the scan is finished. - if (!ref.isScan()) { - candidateEntriesToBeDeleted.add(gcTemp); + String relativePath = makeRelative(reference, 3); + + // WARNING: This line is EXTREMELY IMPORTANT. + // You MUST REMOVE candidates that are still in use + GcCandidate gcTemp = candidateMap.remove(relativePath); + if (gcTemp != null) { + log.debug("File Candidate was still in use: {}", relativePath); + // Prevent deletion of candidates that are still in use by scans, because they won't be + // recreated once the scan is finished. + if (!ref.isScan()) { + candidateEntriesToBeDeleted.add(gcTemp); + } } - } - String dir = relativePath.substring(0, relativePath.lastIndexOf('/')); - GcCandidate gcT = candidateMap.remove(dir); - if (gcT != null) { - log.debug("Directory Candidate was still in use by file ref: {}", relativePath); - // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created once. + String dir = relativePath.substring(0, relativePath.lastIndexOf('/')); + GcCandidate gcT = candidateMap.remove(dir); + if (gcT != null) { + log.debug("Directory Candidate was still in use by file ref: {}", relativePath); + // Do not add dir candidates to candidateEntriesToBeDeleted as they are only created + // once. + } } - } + }); } Set<TableId> tableIdsAfter = gce.getCandidateTableIDs(); ensureAllTablesChecked(Collections.unmodifiableSet(tableIdsBefore), diff --cc server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java index 4c459f150c,74b6318029..bc21ad5b35 --- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java +++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java @@@ -40,13 -41,13 +40,12 @@@ import org.apache.accumulo.core.client. import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.gc.GcCandidate; import org.apache.accumulo.core.gc.Reference; - import org.apache.accumulo.core.gc.ReferenceDirectory; import org.apache.accumulo.core.gc.ReferenceFile; import org.apache.accumulo.core.manager.state.tables.TableState; -import org.apache.accumulo.core.metadata.MetadataTable; -import org.apache.accumulo.core.metadata.RootTable; +import org.apache.accumulo.core.metadata.AccumuloTable; import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.Ample.GcCandidateType; -import org.apache.accumulo.server.replication.proto.Replication.Status; +import org.apache.hadoop.fs.Path; import org.junit.jupiter.api.Test; public class GarbageCollectionTest {