This is an automated email from the ASF dual-hosted git repository. domgarguilo pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new b21837a39e Add metrics for scan server reservation write out time and collisions (#4577) b21837a39e is described below commit b21837a39e51968b8cc87dcf9781fa37b5aefd39 Author: Dom G <domgargu...@apache.org> AuthorDate: Wed May 22 15:48:29 2024 -0400 Add metrics for scan server reservation write out time and collisions (#4577) --- .../accumulo/core/metrics/MetricsProducer.java | 8 ++++-- .../org/apache/accumulo/tserver/ScanServer.java | 11 ++++---- .../apache/accumulo/tserver/ScanServerMetrics.java | 32 +++++++++++++++++++--- .../apache/accumulo/test/metrics/MetricsIT.java | 2 +- 4 files changed, 41 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java index ddc9278b43..dd4489b87c 100644 --- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java +++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java @@ -341,7 +341,7 @@ import io.micrometer.core.instrument.MeterRegistry; * <tr> * <th>N/A</th> * <th>N/A</th> - * <th>{@value #METRICS_SCAN_RESERVATION_TIMER}</th> + * <th>{@value #METRICS_SCAN_RESERVATION_TOTAL_TIMER}</th> * <th>Timer</th> * <th>Time to reserve a tablets files for scan</th> * </tr> @@ -629,8 +629,12 @@ public interface MetricsProducer { String METRICS_SCAN_START = METRICS_SCAN_PREFIX + "start"; String METRICS_SCAN_CONTINUE = METRICS_SCAN_PREFIX + "continue"; String METRICS_SCAN_CLOSE = METRICS_SCAN_PREFIX + "close"; + String METRICS_SCAN_RESERVATION_TOTAL_TIMER = METRICS_SCAN_PREFIX + "reservation.total.timer"; + String METRICS_SCAN_RESERVATION_WRITEOUT_TIMER = + METRICS_SCAN_PREFIX + "reservation.writeout.timer"; String METRICS_SCAN_BUSY_TIMEOUT_COUNTER = METRICS_SCAN_PREFIX + "busy.timeout.count"; - String METRICS_SCAN_RESERVATION_TIMER = METRICS_SCAN_PREFIX + "reservation.timer"; + String METRICS_SCAN_RESERVATION_CONFLICT_COUNTER = + METRICS_SCAN_PREFIX + "reservation.conflict.count"; String METRICS_SCAN_QUERIES = METRICS_SCAN_PREFIX + "queries"; String METRICS_SCAN_QUERY_SCAN_RESULTS = METRICS_SCAN_PREFIX + "query.results"; String METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES = METRICS_SCAN_PREFIX + "query.results.bytes"; diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java index 8d519c53f4..2ddb76e2cb 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.net.UnknownHostException; import java.nio.ByteBuffer; +import java.time.Duration; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -601,7 +602,8 @@ public class ScanServer extends AbstractServer } if (!filesToReserve.isEmpty()) { - getContext().getAmple().putScanServerFileReferences(refs); + scanServerMetrics.recordWriteOutReservationTime( + () -> getContext().getAmple().putScanServerFileReferences(refs)); // After we insert the scan server refs we need to check and see if the tablet is still // using the file. As long as the tablet is still using the files then the Accumulo GC @@ -635,6 +637,7 @@ public class ScanServer extends AbstractServer LOG.info("RFFS {} tablet files changed while attempting to reference files {}", myReservationId, filesToReserve); getContext().getAmple().deleteScanServerFileReferences(refs); + scanServerMetrics.incrementReservationConflictCount(); return null; } } @@ -669,8 +672,7 @@ public class ScanServer extends AbstractServer try { return reserveFiles(extents); } finally { - scanServerMetrics.getReservationTimer().record(System.nanoTime() - start, - TimeUnit.NANOSECONDS); + scanServerMetrics.recordTotalReservationTime(Duration.ofNanos(System.nanoTime() - start)); } } @@ -711,8 +713,7 @@ public class ScanServer extends AbstractServer try { return reserveFiles(scanId); } finally { - scanServerMetrics.getReservationTimer().record(System.nanoTime() - start, - TimeUnit.NANOSECONDS); + scanServerMetrics.recordTotalReservationTime(Duration.ofNanos(System.nanoTime() - start)); } } diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java index 771def8e4f..365c26ceee 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServerMetrics.java @@ -18,6 +18,9 @@ */ package org.apache.accumulo.tserver; +import java.time.Duration; +import java.util.concurrent.atomic.AtomicLong; + import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metrics.MetricsProducer; @@ -26,14 +29,17 @@ import com.github.benmanes.caffeine.cache.LoadingCache; import com.google.common.base.Preconditions; import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.FunctionCounter; import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.Timer; import io.micrometer.core.instrument.binder.cache.CaffeineCacheMetrics; public class ScanServerMetrics implements MetricsProducer { - private Timer reservationTimer; + private Timer totalReservationTimer; + private Timer writeOutReservationTimer; private Counter busyTimeoutCount; + private final AtomicLong reservationConflictCount = new AtomicLong(0); private final LoadingCache<KeyExtent,TabletMetadata> tabletMetadataCache; @@ -43,20 +49,38 @@ public class ScanServerMetrics implements MetricsProducer { @Override public void registerMetrics(MeterRegistry registry) { - reservationTimer = Timer.builder(MetricsProducer.METRICS_SCAN_RESERVATION_TIMER) + totalReservationTimer = Timer.builder(MetricsProducer.METRICS_SCAN_RESERVATION_TOTAL_TIMER) .description("Time to reserve a tablets files for scan").register(registry); + writeOutReservationTimer = Timer + .builder(MetricsProducer.METRICS_SCAN_RESERVATION_WRITEOUT_TIMER) + .description("Time to write out a tablets file reservations for scan").register(registry); busyTimeoutCount = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT_COUNTER) .description("The number of scans where a busy timeout happened").register(registry); + FunctionCounter + .builder(METRICS_SCAN_RESERVATION_CONFLICT_COUNTER, reservationConflictCount, + AtomicLong::get) + .description( + "Counts instances where file reservation attempts for scans encountered conflicts") + .register(registry); + Preconditions.checkState(tabletMetadataCache.policy().isRecordingStats(), "Attempted to instrument cache that is not recording stats."); CaffeineCacheMetrics.monitor(registry, tabletMetadataCache, METRICS_SCAN_TABLET_METADATA_CACHE); } - public Timer getReservationTimer() { - return reservationTimer; + public void recordTotalReservationTime(Duration time) { + totalReservationTimer.record(time); + } + + public void recordWriteOutReservationTime(Runnable time) { + writeOutReservationTimer.record(time); } public void incrementBusy() { busyTimeoutCount.increment(); } + + public void incrementReservationConflictCount() { + reservationConflictCount.getAndIncrement(); + } } diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java index 55622d0793..3bb3353529 100644 --- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java @@ -103,7 +103,7 @@ public class MetricsIT extends ConfigurableMacBase implements MetricsProducer { METRICS_REPLICATION_QUEUE, METRICS_COMPACTOR_MAJC_STUCK, METRICS_SCAN_BUSY_TIMEOUT_COUNTER); // add sserver as flaky until scan server included in mini tests. Set<String> flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS, METRICS_FATE_TYPE_IN_PROGRESS, - METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TIMER, + METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TOTAL_TIMER, METRICS_SCAN_TABLET_METADATA_CACHE); Map<String,String> expectedMetricNames = this.getMetricFields();