This is an automated email from the ASF dual-hosted git repository.

ddanielr pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new 2950699947 Adds batch information to GC logging (#5391)
2950699947 is described below

commit 29506999476e99ffc15019469c35705ced0d3278
Author: Daniel Roberts <ddani...@gmail.com>
AuthorDate: Tue Mar 11 21:06:08 2025 -0400

    Adds batch information to GC logging (#5391)
    
    Adds batch counter to log messages.
    Adds in-progress counters to long-running methods.
    
    Uses getCompletedTaskCount which
    should list the amount of completed tasks in the threadpool.
---
 .../main/java/org/apache/accumulo/gc/GCRun.java    | 40 +++++++++++++++++-----
 .../accumulo/gc/GarbageCollectionAlgorithm.java    |  9 +++--
 .../accumulo/gc/SimpleGarbageCollectorTest.java    |  3 +-
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java 
b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
index a4b328438f..22275c9e11 100644
--- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
+++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java
@@ -25,6 +25,7 @@ import static 
org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -36,8 +37,9 @@ import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.SortedMap;
-import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Stream;
 
 import org.apache.accumulo.core.Constants;
@@ -65,6 +67,7 @@ import 
org.apache.accumulo.core.metadata.schema.TabletMetadata;
 import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
 import org.apache.accumulo.core.security.Authorizations;
 import org.apache.accumulo.core.util.Pair;
+import org.apache.accumulo.core.util.Timer;
 import org.apache.accumulo.core.util.UtilWaitThread;
 import org.apache.accumulo.core.util.threads.ThreadPools;
 import org.apache.accumulo.core.volume.Volume;
@@ -95,16 +98,19 @@ public class GCRun implements GarbageCollectionEnvironment {
   private final Ample.DataLevel level;
   private final ServerContext context;
   private final AccumuloConfiguration config;
+  private final Duration loggingInterval = Duration.ofMinutes(1);
   private long candidates = 0;
   private long inUse = 0;
   private long deleted = 0;
   private long errors = 0;
+  private AtomicInteger batchCount;
 
   public GCRun(Ample.DataLevel level, ServerContext context) {
     this.log = LoggerFactory.getLogger(GCRun.class.getName() + "." + 
level.name());
     this.level = level;
     this.context = context;
     this.config = context.getConfiguration();
+    this.batchCount = new AtomicInteger(0);
   }
 
   @Override
@@ -132,7 +138,8 @@ public class GCRun implements GarbageCollectionEnvironment {
       return;
     }
 
-    log.info("Attempting to delete gcCandidates of type {} from metadata", 
type);
+    log.info("Batch {} attempting to delete {} gcCandidates of type {} from 
metadata",
+        batchCount.get(), gcCandidates.size(), type);
     context.getAmple().deleteGcCandidates(level, gcCandidates, type);
   }
 
@@ -143,6 +150,7 @@ public class GCRun implements GarbageCollectionEnvironment {
     long candidateBatchSize = getCandidateBatchSize() / 2;
 
     List<GcCandidate> candidatesBatch = new ArrayList<>();
+    batchCount.incrementAndGet();
 
     while (candidates.hasNext()) {
       GcCandidate candidate = candidates.next();
@@ -288,15 +296,17 @@ public class GCRun implements 
GarbageCollectionEnvironment {
 
     List<GcCandidate> processedDeletes = Collections.synchronizedList(new 
ArrayList<>());
 
-    minimizeDeletes(confirmedDeletes, processedDeletes, fs, log);
+    minimizeDeletes(confirmedDeletes, processedDeletes, fs, log, 
loggingInterval);
 
-    ExecutorService deleteThreadPool = ThreadPools.getServerThreadPools()
+    ThreadPoolExecutor deleteThreadPool = ThreadPools.getServerThreadPools()
         .createExecutorService(config, Property.GC_DELETE_THREADS);
 
     final List<Pair<Path,Path>> replacements = context.getVolumeReplacements();
 
+    log.info("Batch {} attempting to delete {} gcCandidate files", 
batchCount.get(),
+        confirmedDeletes.size());
+    Timer timer = Timer.startNew();
     for (final GcCandidate delete : confirmedDeletes.values()) {
-
       Runnable deleteTask = () -> {
         boolean removeFlag = false;
 
@@ -320,7 +330,7 @@ public class GCRun implements GarbageCollectionEnvironment {
           }
 
           for (Path pathToDel : GcVolumeUtil.expandAllVolumesUri(fs, 
fullPath)) {
-            log.debug("{} Deleting {}", fileActionPrefix, pathToDel);
+            log.debug("Batch {} {} Deleting {}", batchCount.get(), 
fileActionPrefix, pathToDel);
 
             if (moveToTrash(pathToDel) || fs.deleteRecursively(pathToDel)) {
               // delete succeeded, still want to delete
@@ -373,7 +383,12 @@ public class GCRun implements GarbageCollectionEnvironment 
{
     deleteThreadPool.shutdown();
 
     try {
-      while (!deleteThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) 
{ // empty
+      while (!deleteThreadPool.awaitTermination(1000, TimeUnit.MILLISECONDS)) {
+        if (timer.hasElapsed(loggingInterval)) {
+          log.info("Batch {} deleting file {} of {}", batchCount.get(),
+              deleteThreadPool.getCompletedTaskCount(), 
confirmedDeletes.size());
+          timer.restart();
+        }
       }
     } catch (InterruptedException e1) {
       log.error("{}", e1.getMessage(), e1);
@@ -441,7 +456,8 @@ public class GCRun implements GarbageCollectionEnvironment {
 
   @VisibleForTesting
   static void minimizeDeletes(SortedMap<String,GcCandidate> confirmedDeletes,
-      List<GcCandidate> processedDeletes, VolumeManager fs, Logger logger) {
+      List<GcCandidate> processedDeletes, VolumeManager fs, Logger logger,
+      Duration loggingInterval) {
     Set<Path> seenVolumes = new HashSet<>();
 
     // when deleting a dir and all files in that dir, only need to delete the 
dir.
@@ -451,7 +467,11 @@ public class GCRun implements GarbageCollectionEnvironment 
{
 
     String lastDirRel = null;
     Path lastDirAbs = null;
+    Timer progressTimer = Timer.startNew();
+    int progressCount = 0;
+    int totalDeletes = confirmedDeletes.size();
     while (cdIter.hasNext()) {
+      progressCount++;
       Map.Entry<String,GcCandidate> entry = cdIter.next();
       String relPath = entry.getKey();
       Path absPath = new Path(entry.getValue().getPath());
@@ -491,6 +511,10 @@ public class GCRun implements GarbageCollectionEnvironment 
{
           lastDirAbs = null;
         }
       }
+      if (progressTimer.hasElapsed(loggingInterval)) {
+        logger.debug("Minimizing delete {} of {}", progressCount, 
totalDeletes);
+        progressTimer.restart();
+      }
     }
   }
 
diff --git 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
index 419d10728f..095d9dd490 100644
--- 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
+++ 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
@@ -387,6 +387,7 @@ public class GarbageCollectionAlgorithm {
 
     Iterator<GcCandidate> candidatesIter = gce.getCandidates();
     long totalBlips = 0;
+    int batchCount = 0;
 
     while (candidatesIter.hasNext()) {
       List<GcCandidate> batchOfCandidates;
@@ -399,7 +400,8 @@ public class GarbageCollectionAlgorithm {
       } finally {
         candidatesSpan.end();
       }
-      totalBlips = deleteBatch(gce, batchOfCandidates);
+      batchCount++;
+      totalBlips = deleteBatch(gce, batchOfCandidates, batchCount);
     }
     return totalBlips;
   }
@@ -407,11 +409,12 @@ public class GarbageCollectionAlgorithm {
   /**
    * Given a sub-list of possible deletion candidates, process and remove 
valid deletion candidates.
    */
-  private long deleteBatch(GarbageCollectionEnvironment gce, List<GcCandidate> 
currentBatch)
-      throws InterruptedException, TableNotFoundException, IOException {
+  private long deleteBatch(GarbageCollectionEnvironment gce, List<GcCandidate> 
currentBatch,
+      int batchCount) throws InterruptedException, TableNotFoundException, 
IOException {
 
     long origSize = currentBatch.size();
     gce.incrementCandidatesStat(origSize);
+    log.info("Batch {} total deletion candidates: {}", batchCount, origSize);
 
     SortedMap<String,GcCandidate> candidateMap = makeRelative(currentBatch);
 
diff --git 
a/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java
 
b/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java
index fdb66ba146..34cb62cc7b 100644
--- 
a/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java
+++ 
b/server/gc/src/test/java/org/apache/accumulo/gc/SimpleGarbageCollectorTest.java
@@ -31,6 +31,7 @@ import static org.junit.jupiter.api.Assertions.assertSame;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.FileNotFoundException;
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -192,7 +193,7 @@ public class SimpleGarbageCollectorTest {
 
     List<GcCandidate> processedDeletes = new ArrayList<>();
 
-    GCRun.minimizeDeletes(confirmed, processedDeletes, volMgr2, log);
+    GCRun.minimizeDeletes(confirmed, processedDeletes, volMgr2, log, 
Duration.ofMinutes(1));
 
     TreeMap<String,GcCandidate> expected = new TreeMap<>();
     expected.put("5a/t-0001", new 
GcCandidate("hdfs://nn1/accumulo/tables/5a/t-0001", 0L));

Reply via email to