This is an automated email from the ASF dual-hosted git repository.

domgarguilo pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new b21837a39e Add metrics for scan server reservation write out time and 
collisions (#4577)
b21837a39e is described below

commit b21837a39e51968b8cc87dcf9781fa37b5aefd39
Author: Dom G <domgargu...@apache.org>
AuthorDate: Wed May 22 15:48:29 2024 -0400

    Add metrics for scan server reservation write out time and collisions 
(#4577)
---
 .../accumulo/core/metrics/MetricsProducer.java     |  8 ++++--
 .../org/apache/accumulo/tserver/ScanServer.java    | 11 ++++----
 .../apache/accumulo/tserver/ScanServerMetrics.java | 32 +++++++++++++++++++---
 .../apache/accumulo/test/metrics/MetricsIT.java    |  2 +-
 4 files changed, 41 insertions(+), 12 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java 
b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
index ddc9278b43..dd4489b87c 100644
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
@@ -341,7 +341,7 @@ import io.micrometer.core.instrument.MeterRegistry;
  * <tr>
  * <th>N/A</th>
  * <th>N/A</th>
- * <th>{@value #METRICS_SCAN_RESERVATION_TIMER}</th>
+ * <th>{@value #METRICS_SCAN_RESERVATION_TOTAL_TIMER}</th>
  * <th>Timer</th>
  * <th>Time to reserve a tablets files for scan</th>
  * </tr>
@@ -629,8 +629,12 @@ public interface MetricsProducer {
   String METRICS_SCAN_START = METRICS_SCAN_PREFIX + "start";
   String METRICS_SCAN_CONTINUE = METRICS_SCAN_PREFIX + "continue";
   String METRICS_SCAN_CLOSE = METRICS_SCAN_PREFIX + "close";
+  String METRICS_SCAN_RESERVATION_TOTAL_TIMER = METRICS_SCAN_PREFIX + 
"reservation.total.timer";
+  String METRICS_SCAN_RESERVATION_WRITEOUT_TIMER =
+      METRICS_SCAN_PREFIX + "reservation.writeout.timer";
   String METRICS_SCAN_BUSY_TIMEOUT_COUNTER = METRICS_SCAN_PREFIX + 
"busy.timeout.count";
-  String METRICS_SCAN_RESERVATION_TIMER = METRICS_SCAN_PREFIX + 
"reservation.timer";
+  String METRICS_SCAN_RESERVATION_CONFLICT_COUNTER =
+      METRICS_SCAN_PREFIX + "reservation.conflict.count";
   String METRICS_SCAN_QUERIES = METRICS_SCAN_PREFIX + "queries";
   String METRICS_SCAN_QUERY_SCAN_RESULTS = METRICS_SCAN_PREFIX + 
"query.results";
   String METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES = METRICS_SCAN_PREFIX + 
"query.results.bytes";
diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
index 8d519c53f4..2ddb76e2cb 100644
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.net.UnknownHostException;
 import java.nio.ByteBuffer;
+import java.time.Duration;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -601,7 +602,8 @@ public class ScanServer extends AbstractServer
       }
 
       if (!filesToReserve.isEmpty()) {
-        getContext().getAmple().putScanServerFileReferences(refs);
+        scanServerMetrics.recordWriteOutReservationTime(
+            () -> getContext().getAmple().putScanServerFileReferences(refs));
 
         // After we insert the scan server refs we need to check and see if 
the tablet is still
         // using the file. As long as the tablet is still using the files then 
the Accumulo GC
@@ -635,6 +637,7 @@ public class ScanServer extends AbstractServer
           LOG.info("RFFS {} tablet files changed while attempting to reference 
files {}",
               myReservationId, filesToReserve);
           getContext().getAmple().deleteScanServerFileReferences(refs);
+          scanServerMetrics.incrementReservationConflictCount();
           return null;
         }
       }
@@ -669,8 +672,7 @@ public class ScanServer extends AbstractServer
     try {
       return reserveFiles(extents);
     } finally {
-      scanServerMetrics.getReservationTimer().record(System.nanoTime() - start,
-          TimeUnit.NANOSECONDS);
+      
scanServerMetrics.recordTotalReservationTime(Duration.ofNanos(System.nanoTime() 
- start));
     }
 
   }
@@ -711,8 +713,7 @@ public class ScanServer extends AbstractServer
     try {
       return reserveFiles(scanId);
     } finally {
-      scanServerMetrics.getReservationTimer().record(System.nanoTime() - start,
-          TimeUnit.NANOSECONDS);
+      
scanServerMetrics.recordTotalReservationTime(Duration.ofNanos(System.nanoTime() 
- start));
     }
   }
 
diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
index 771def8e4f..365c26ceee 100644
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java
@@ -18,6 +18,9 @@
  */
 package org.apache.accumulo.tserver;
 
+import java.time.Duration;
+import java.util.concurrent.atomic.AtomicLong;
+
 import org.apache.accumulo.core.dataImpl.KeyExtent;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata;
 import org.apache.accumulo.core.metrics.MetricsProducer;
@@ -26,14 +29,17 @@ import com.github.benmanes.caffeine.cache.LoadingCache;
 import com.google.common.base.Preconditions;
 
 import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.FunctionCounter;
 import io.micrometer.core.instrument.MeterRegistry;
 import io.micrometer.core.instrument.Timer;
 import io.micrometer.core.instrument.binder.cache.CaffeineCacheMetrics;
 
 public class ScanServerMetrics implements MetricsProducer {
 
-  private Timer reservationTimer;
+  private Timer totalReservationTimer;
+  private Timer writeOutReservationTimer;
   private Counter busyTimeoutCount;
+  private final AtomicLong reservationConflictCount = new AtomicLong(0);
 
   private final LoadingCache<KeyExtent,TabletMetadata> tabletMetadataCache;
 
@@ -43,20 +49,38 @@ public class ScanServerMetrics implements MetricsProducer {
 
   @Override
   public void registerMetrics(MeterRegistry registry) {
-    reservationTimer = 
Timer.builder(MetricsProducer.METRICS_SCAN_RESERVATION_TIMER)
+    totalReservationTimer = 
Timer.builder(MetricsProducer.METRICS_SCAN_RESERVATION_TOTAL_TIMER)
         .description("Time to reserve a tablets files for 
scan").register(registry);
+    writeOutReservationTimer = Timer
+        .builder(MetricsProducer.METRICS_SCAN_RESERVATION_WRITEOUT_TIMER)
+        .description("Time to write out a tablets file reservations for 
scan").register(registry);
     busyTimeoutCount = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT_COUNTER)
         .description("The number of scans where a busy timeout 
happened").register(registry);
+    FunctionCounter
+        .builder(METRICS_SCAN_RESERVATION_CONFLICT_COUNTER, 
reservationConflictCount,
+            AtomicLong::get)
+        .description(
+            "Counts instances where file reservation attempts for scans 
encountered conflicts")
+        .register(registry);
+
     Preconditions.checkState(tabletMetadataCache.policy().isRecordingStats(),
         "Attempted to instrument cache that is not recording stats.");
     CaffeineCacheMetrics.monitor(registry, tabletMetadataCache, 
METRICS_SCAN_TABLET_METADATA_CACHE);
   }
 
-  public Timer getReservationTimer() {
-    return reservationTimer;
+  public void recordTotalReservationTime(Duration time) {
+    totalReservationTimer.record(time);
+  }
+
+  public void recordWriteOutReservationTime(Runnable time) {
+    writeOutReservationTimer.record(time);
   }
 
   public void incrementBusy() {
     busyTimeoutCount.increment();
   }
+
+  public void incrementReservationConflictCount() {
+    reservationConflictCount.getAndIncrement();
+  }
 }
diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java 
b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
index 55622d0793..3bb3353529 100644
--- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
@@ -103,7 +103,7 @@ public class MetricsIT extends ConfigurableMacBase 
implements MetricsProducer {
         METRICS_REPLICATION_QUEUE, METRICS_COMPACTOR_MAJC_STUCK, 
METRICS_SCAN_BUSY_TIMEOUT_COUNTER);
     // add sserver as flaky until scan server included in mini tests.
     Set<String> flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS, 
METRICS_FATE_TYPE_IN_PROGRESS,
-        METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TIMER,
+        METRICS_SCAN_BUSY_TIMEOUT_COUNTER, 
METRICS_SCAN_RESERVATION_TOTAL_TIMER,
         METRICS_SCAN_TABLET_METADATA_CACHE);
 
     Map<String,String> expectedMetricNames = this.getMetricFields();

Reply via email to