This is an automated email from the ASF dual-hosted git repository.

edcoleman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 46ed92329cf83911c5b43f348658941e0979f3f9
Merge: 8da73ce467 96b86a5f62
Author: Ed Coleman <edcole...@apache.org>
AuthorDate: Fri May 3 16:57:08 2024 +0000

    Merge remote-tracking branch 'upstream/2.1'
    
    includes:
      - 96b86a5f62 - update for additional scan server metrics
      - c488f788ad - adds scan server metrics

 .../accumulo/core/metrics/MetricsProducer.java     | 108 +++++++++++----------
 .../org/apache/accumulo/tserver/ScanServer.java    |  55 +++++++++--
 .../apache/accumulo/tserver/ScanServerMetrics.java |  59 +++++++++++
 .../accumulo/tserver/ThriftScanClientHandler.java  |   4 +-
 .../tserver/metrics/TabletServerScanMetrics.java   |  24 +++--
 .../apache/accumulo/tserver/ScanServerTest.java    |   9 ++
 .../apache/accumulo/test/metrics/MetricsIT.java    |  11 ++-
 7 files changed, 192 insertions(+), 78 deletions(-)

diff --cc 
core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
index 5e43b2a938,1bb2a1c10e..2fdf9172ab
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
@@@ -599,9 -613,14 +600,9 @@@ public interface MetricsProducer 
    String METRICS_MINC_PREFIX = "accumulo.tserver.compactions.minc.";
    String METRICS_MINC_QUEUED = METRICS_MINC_PREFIX + "queued";
    String METRICS_MINC_RUNNING = METRICS_MINC_PREFIX + "running";
 -
 -  String METRICS_REPLICATION_PREFIX = "accumulo.replication.";
 -  String METRICS_REPLICATION_QUEUE = METRICS_REPLICATION_PREFIX + "queue";
 -  String METRICS_REPLICATION_PENDING_FILES = METRICS_REPLICATION_PREFIX + 
"files.pending";
 -  String METRICS_REPLICATION_PEERS = METRICS_REPLICATION_PREFIX + "peers";
 -  String METRICS_REPLICATION_THREADS = METRICS_REPLICATION_PREFIX + "threads";
 +  String METRICS_MINC_PAUSED = METRICS_MINC_PREFIX + "paused";
  
-   String METRICS_SCAN_PREFIX = "accumulo.tserver.scans.";
+   String METRICS_SCAN_PREFIX = "accumulo.scan.";
    String METRICS_SCAN_TIMES = METRICS_SCAN_PREFIX + "times";
    String METRICS_SCAN_OPEN_FILES = METRICS_SCAN_PREFIX + "files.open";
    String METRICS_SCAN_RESULTS = METRICS_SCAN_PREFIX + "result";
@@@ -609,10 -628,15 +610,17 @@@
    String METRICS_SCAN_START = METRICS_SCAN_PREFIX + "start";
    String METRICS_SCAN_CONTINUE = METRICS_SCAN_PREFIX + "continue";
    String METRICS_SCAN_CLOSE = METRICS_SCAN_PREFIX + "close";
-   String METRICS_SCAN_BUSY_TIMEOUT = METRICS_SCAN_PREFIX + "busy.timeout";
+   String METRICS_SCAN_BUSY_TIMEOUT_COUNTER = METRICS_SCAN_PREFIX + 
"busy.timeout.count";
+   String METRICS_SCAN_RESERVATION_TIMER = METRICS_SCAN_PREFIX + 
"reservation.timer";
+   String METRICS_SCAN_QUERIES = METRICS_SCAN_PREFIX + "queries";
+   String METRICS_SCAN_QUERY_SCAN_RESULTS = METRICS_SCAN_PREFIX + 
"query.results";
+   String METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES = METRICS_SCAN_PREFIX + 
"query.results.bytes";
+   String METRICS_SCAN_SCANNED_ENTRIES = METRICS_SCAN_PREFIX + 
"query.scanned.entries";
 +  String METRICS_SCAN_PAUSED_FOR_MEM = METRICS_SCAN_PREFIX + 
"paused.for.memory";
 +  String METRICS_SCAN_RETURN_FOR_MEM = METRICS_SCAN_PREFIX + 
"return.early.for.memory";
  
+   String METRICS_SCAN_TABLET_METADATA_CACHE = METRICS_SCAN_PREFIX + 
"tablet.metadata.cache";
+ 
    String METRICS_TSERVER_PREFIX = "accumulo.tserver.";
    String METRICS_TSERVER_ENTRIES = METRICS_TSERVER_PREFIX + "entries";
    String METRICS_TSERVER_MEM_ENTRIES = METRICS_TSERVER_PREFIX + "entries.mem";
diff --cc 
server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
index ca4b28d06e,1e237e80e8..44ab680a98
--- a/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
+++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/ScanServer.java
@@@ -76,17 -70,20 +76,18 @@@ import org.apache.accumulo.core.metadat
  import org.apache.accumulo.core.metadata.StoredTabletFile;
  import org.apache.accumulo.core.metadata.schema.Ample;
  import org.apache.accumulo.core.metadata.schema.TabletMetadata;
 +import org.apache.accumulo.core.metadata.schema.TabletsMetadata;
  import org.apache.accumulo.core.metrics.MetricsInfo;
  import org.apache.accumulo.core.securityImpl.thrift.TCredentials;
 -import org.apache.accumulo.core.spi.scan.ScanServerSelector;
 -import org.apache.accumulo.core.tabletserver.thrift.ActiveScan;
 +import org.apache.accumulo.core.tabletscan.thrift.ActiveScan;
++import org.apache.accumulo.core.tabletscan.thrift.ScanServerBusyException;
 +import org.apache.accumulo.core.tabletscan.thrift.TSampleNotPresentException;
 +import org.apache.accumulo.core.tabletscan.thrift.TSamplerConfiguration;
 +import org.apache.accumulo.core.tabletscan.thrift.TabletScanClientService;
 +import org.apache.accumulo.core.tabletscan.thrift.TooManyFilesException;
  import org.apache.accumulo.core.tabletserver.thrift.NoSuchScanIDException;
  import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException;
 -import org.apache.accumulo.core.tabletserver.thrift.ScanServerBusyException;
 -import 
org.apache.accumulo.core.tabletserver.thrift.TSampleNotPresentException;
 -import org.apache.accumulo.core.tabletserver.thrift.TSamplerConfiguration;
 -import org.apache.accumulo.core.tabletserver.thrift.TabletScanClientService;
 -import org.apache.accumulo.core.tabletserver.thrift.TooManyFilesException;
 -import org.apache.accumulo.core.trace.thrift.TInfo;
  import org.apache.accumulo.core.util.Halt;
 -import org.apache.accumulo.core.util.HostAndPort;
  import org.apache.accumulo.core.util.UtilWaitThread;
  import org.apache.accumulo.core.util.threads.ThreadPools;
  import org.apache.accumulo.server.AbstractServer;
@@@ -125,8 -122,9 +126,10 @@@ import com.github.benmanes.caffeine.cac
  import com.google.common.annotations.VisibleForTesting;
  import com.google.common.base.Preconditions;
  import com.google.common.collect.Sets;
 +import com.google.common.net.HostAndPort;
  
+ import io.micrometer.core.instrument.Tag;
+ 
  public class ScanServer extends AbstractServer
      implements TabletScanClientService.Iface, TabletHostingServer {
  
@@@ -375,10 -376,12 +379,12 @@@
  
      MetricsInfo metricsInfo = getContext().getMetricsInfo();
      metricsInfo.addServiceTags(getApplicationName(), clientAddress);
+     metricsInfo.addCommonTags(List.of(Tag.of("resource.group", groupName)));
  
      scanMetrics = new TabletServerScanMetrics();
+     scanServerMetrics = new ScanServerMetrics(tabletMetadataCache);
  
-     metricsInfo.addMetricsProducers(this, scanMetrics);
 -    metricsInfo.addMetricsProducers(scanMetrics, scanServerMetrics);
++    metricsInfo.addMetricsProducers(this, scanMetrics, scanServerMetrics);
      metricsInfo.init();
      // We need to set the compaction manager so that we don't get an NPE in 
CompactableImpl.close
  
diff --cc 
server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java
index 9a1faa6261,8e066dd7f7..09f8431bb4
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerScanMetrics.java
@@@ -39,9 -39,7 +39,9 @@@ public class TabletServerScanMetrics im
    private Counter startScanCalls;
    private Counter continueScanCalls;
    private Counter closeScanCalls;
-   private Counter busyTimeoutReturned;
+   private Counter busyTimeoutCount;
 +  private Counter pausedForMemory;
 +  private Counter earlyReturnForMemory;
  
    private final LongAdder lookupCount = new LongAdder();
    private final LongAdder queryResultCount = new LongAdder();
@@@ -72,10 -70,10 +72,6 @@@
      return this.queryResultBytes.sum();
    }
  
--  public void incrementScannedCount(long amount) {
--    this.scannedCount.add(amount);
--  }
--
    public LongAdder getScannedCounter() {
      return this.scannedCount;
    }
@@@ -116,18 -114,10 +112,18 @@@
      closeScanCalls.increment(value);
    }
  
-   public void incrementScanBusyTimeout(double value) {
-     busyTimeoutReturned.increment(value);
+   public void incrementBusy(double value) {
+     busyTimeoutCount.increment(value);
    }
  
 +  public void incrementScanPausedForLowMemory() {
 +    pausedForMemory.increment();
 +  }
 +
 +  public void incrementEarlyReturnForLowMemory() {
 +    earlyReturnForMemory.increment();
 +  }
 +
    @Override
    public void registerMetrics(MeterRegistry registry) {
      Gauge.builder(METRICS_SCAN_OPEN_FILES, openFiles::get)
@@@ -143,23 -133,20 +139,25 @@@
          .description("calls to continue a scan / 
multiscan").register(registry);
      closeScanCalls = Counter.builder(METRICS_SCAN_CLOSE)
          .description("calls to close a scan / multiscan").register(registry);
-     busyTimeoutReturned = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT)
-         .description("times that a scan has timed out in the 
queue").register(registry);
-     Gauge.builder(METRICS_TSERVER_QUERIES, this, 
TabletServerScanMetrics::getLookupCount)
+     busyTimeoutCount = Counter.builder(METRICS_SCAN_BUSY_TIMEOUT_COUNTER)
+         .description("The number of scans where a busy timeout 
happened").register(registry);
+     Gauge.builder(METRICS_SCAN_QUERIES, this, 
TabletServerScanMetrics::getLookupCount)
          .description("Number of queries").register(registry);
-     Gauge.builder(METRICS_TSERVER_SCAN_RESULTS, this, 
TabletServerScanMetrics::getQueryResultCount)
+     Gauge
+         .builder(METRICS_SCAN_QUERY_SCAN_RESULTS, this,
+             TabletServerScanMetrics::getQueryResultCount)
          .description("Query rate (entries/sec)").register(registry);
      Gauge
-         .builder(METRICS_TSERVER_SCAN_RESULTS_BYTES, this,
+         .builder(METRICS_SCAN_QUERY_SCAN_RESULTS_BYTES, this,
              TabletServerScanMetrics::getQueryByteCount)
          .description("Query rate (bytes/sec)").register(registry);
-     Gauge.builder(METRICS_TSERVER_SCANNED_ENTRIES, this, 
TabletServerScanMetrics::getScannedCount)
+     Gauge.builder(METRICS_SCAN_SCANNED_ENTRIES, this, 
TabletServerScanMetrics::getScannedCount)
          .description("Scanned rate").register(registry);
 +    pausedForMemory = Counter.builder(METRICS_SCAN_PAUSED_FOR_MEM)
 +        .description("scan paused due to server being low on 
memory").register(registry);
 +    earlyReturnForMemory = Counter.builder(METRICS_SCAN_RETURN_FOR_MEM)
 +        .description("scan returned results early due to server being low on 
memory")
 +        .register(registry);
    }
  
  }
diff --cc test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
index 4b92480f19,55622d0793..f2be6e71a5
--- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
@@@ -99,10 -99,12 +99,13 @@@ public class MetricsIT extends Configur
      doWorkToGenerateMetrics();
      cluster.stop();
  
--    Set<String> unexpectedMetrics = Set.of(METRICS_SCAN_YIELDS, 
METRICS_UPDATE_ERRORS,
-         METRICS_COMPACTOR_MAJC_STUCK, METRICS_SCAN_BUSY_TIMEOUT, 
METRICS_SCAN_PAUSED_FOR_MEM,
-         METRICS_SCAN_RETURN_FOR_MEM, METRICS_MINC_PAUSED, 
METRICS_MAJC_PAUSED);
-     Set<String> flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS, 
METRICS_FATE_TYPE_IN_PROGRESS);
 -        METRICS_REPLICATION_QUEUE, METRICS_COMPACTOR_MAJC_STUCK, 
METRICS_SCAN_BUSY_TIMEOUT_COUNTER);
 -    // add sserver as flaky until scan server included in mini tests.
++    Set<String> unexpectedMetrics =
++        Set.of(METRICS_SCAN_YIELDS, METRICS_UPDATE_ERRORS, 
METRICS_COMPACTOR_MAJC_STUCK,
++            METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_PAUSED_FOR_MEM,
++            METRICS_SCAN_RETURN_FOR_MEM, METRICS_MINC_PAUSED, 
METRICS_MAJC_PAUSED);
+     Set<String> flakyMetrics = Set.of(METRICS_GC_WAL_ERRORS, 
METRICS_FATE_TYPE_IN_PROGRESS,
+         METRICS_SCAN_BUSY_TIMEOUT_COUNTER, METRICS_SCAN_RESERVATION_TIMER,
+         METRICS_SCAN_TABLET_METADATA_CACHE);
  
      Map<String,String> expectedMetricNames = this.getMetricFields();
      flakyMetrics.forEach(expectedMetricNames::remove); // might not see these

Reply via email to