This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new 7147177385 corrects open file metric (#5009) 7147177385 is described below commit 7147177385ef04961250b63cf5a709c41bd94287 Author: Keith Turner <ktur...@apache.org> AuthorDate: Wed Oct 23 17:18:43 2024 -0400 corrects open file metric (#5009) The metrics for computing the current number of open files on a tablet server was incorrect over time. Used the semaphor limiting open files to compute the number of open files. The removes the need to correctly increment and decrement files opened which had a bug. --- .../org/apache/accumulo/server/fs/FileManager.java | 4 ++++ .../java/org/apache/accumulo/tserver/ScanServer.java | 2 +- .../java/org/apache/accumulo/tserver/TabletServer.java | 2 +- .../accumulo/tserver/TabletServerResourceManager.java | 4 ++++ .../tserver/metrics/TabletServerScanMetrics.java | 18 +++++++----------- .../apache/accumulo/tserver/tablet/ScanDataSource.java | 3 --- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/fs/FileManager.java b/server/base/src/main/java/org/apache/accumulo/server/fs/FileManager.java index 277f0c6240..adceeeb1c0 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/fs/FileManager.java +++ b/server/base/src/main/java/org/apache/accumulo/server/fs/FileManager.java @@ -593,4 +593,8 @@ public class FileManager { public ScanFileManager newScanFileManager(KeyExtent tablet, CacheProvider cacheProvider) { return new ScanFileManager(tablet, cacheProvider); } + + public int getOpenFiles() { + return maxOpen - filePermits.availablePermits(); + } } diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java index 688a08b171..84cc046fbe 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java @@ -407,7 +407,7 @@ public class ScanServer extends AbstractServer metricsInfo.addServiceTags(getApplicationName(), clientAddress); metricsInfo.addCommonTags(List.of(Tag.of("resource.group", groupName))); - scanMetrics = new TabletServerScanMetrics(); + scanMetrics = new TabletServerScanMetrics(resourceManager::getOpenFiles); sessionManager.setZombieCountConsumer(scanMetrics::setZombieScanThreads); scanServerMetrics = new ScanServerMetrics(tabletMetadataCache); blockCacheMetrics = new BlockCacheMetrics(resourceManager.getIndexCache(), diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java index 29d392f1cc..24573dfd1f 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServer.java @@ -765,7 +765,7 @@ public class TabletServer extends AbstractServer implements TabletHostingServer metrics = new TabletServerMetrics(this); updateMetrics = new TabletServerUpdateMetrics(); - scanMetrics = new TabletServerScanMetrics(); + scanMetrics = new TabletServerScanMetrics(this.resourceManager::getOpenFiles); sessionManager.setZombieCountConsumer(scanMetrics::setZombieScanThreads); mincMetrics = new TabletServerMinCMetrics(); ceMetrics = new CompactionExecutorsMetrics(); diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServerResourceManager.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServerResourceManager.java index cdcebd2576..a171d7d33c 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServerResourceManager.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/TabletServerResourceManager.java @@ -400,6 +400,10 @@ public class TabletServerResourceManager { new AssignmentWatcher(acuConf, context, activeAssignments), 5000, TimeUnit.MILLISECONDS)); } + public int getOpenFiles() { + return fileManager.getOpenFiles(); + } + /** * Accepts some map which is tracking active assignment task(s) (running) and monitors them to * ensure that the time the assignment(s) have been running don't exceed a threshold. If the time diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java index a46a8bdeab..36fbd42de1 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java @@ -19,9 +19,9 @@ package org.apache.accumulo.tserver.metrics; import java.time.Duration; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; +import java.util.function.IntSupplier; import org.apache.accumulo.core.metrics.MetricsProducer; import org.apache.accumulo.server.metrics.NoopMetrics; @@ -34,7 +34,7 @@ import io.micrometer.core.instrument.Timer; public class TabletServerScanMetrics implements MetricsProducer { - private final AtomicInteger openFiles = new AtomicInteger(0); + private final IntSupplier openFiles; private Timer scans = NoopMetrics.useNoopTimer(); private DistributionSummary resultsPerScan = NoopMetrics.useNoopDistributionSummary(); private DistributionSummary yields = NoopMetrics.useNoopDistributionSummary(); @@ -96,14 +96,6 @@ public class TabletServerScanMetrics implements MetricsProducer { yields.record(value); } - public void incrementOpenFiles(int delta) { - openFiles.addAndGet(Math.max(0, delta)); - } - - public void decrementOpenFiles(int delta) { - openFiles.addAndGet(delta < 0 ? delta : delta * -1); - } - public void incrementStartScan(double value) { startScanCalls.increment(value); } @@ -128,9 +120,13 @@ public class TabletServerScanMetrics implements MetricsProducer { return zombieScanThreads.get(); } + public TabletServerScanMetrics(IntSupplier openFileSupplier) { + openFiles = openFileSupplier; + } + @Override public void registerMetrics(MeterRegistry registry) { - Gauge.builder(METRICS_SCAN_OPEN_FILES, openFiles::get) + Gauge.builder(METRICS_SCAN_OPEN_FILES, openFiles::getAsInt) .description("Number of files open for scans").register(registry); scans = Timer.builder(METRICS_SCAN_TIMES).description("Scans").register(registry); resultsPerScan = DistributionSummary.builder(METRICS_SCAN_RESULTS) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java index 8b7aca78f7..77b0de4b08 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/ScanDataSource.java @@ -100,7 +100,6 @@ class ScanDataSource implements DataSource { } finally { try { if (fileManager != null) { - tablet.getScanMetrics().decrementOpenFiles(fileManager.getNumOpenFiles()); fileManager.releaseOpenFiles(false); } } catch (Exception e) { @@ -154,7 +153,6 @@ class ScanDataSource implements DataSource { // only acquire the file manager when we know the tablet is open if (fileManager == null) { fileManager = tablet.getTabletResources().newScanFileManager(scanParams.getScanDispatch()); - tablet.getScanMetrics().incrementOpenFiles(fileManager.getNumOpenFiles()); log.trace("Adding active scan for {}, scanId:{}", tablet.getExtent(), scanDataSourceId); tablet.addActiveScans(this); } @@ -274,7 +272,6 @@ class ScanDataSource implements DataSource { } try { if (fileManager != null) { - tablet.getScanMetrics().decrementOpenFiles(fileManager.getNumOpenFiles()); fileManager.releaseOpenFiles(sawErrors); } } finally {