This is an automated email from the ASF dual-hosted git repository.

domgarguilo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 7f40306d968f0d76ab1d9adeadbd481ea24f099d
Merge: 816fa97cbf 960cee8d45
Author: Dom Garguilo <domgargu...@apache.org>
AuthorDate: Mon Sep 23 15:24:45 2024 -0400

    Merge remote-tracking branch 'upstream/3.1'

 .../org/apache/accumulo/core/metrics/Metric.java   | 231 ++++++++++++---------
 .../server/compaction/PausedCompactionMetrics.java |   4 +-
 .../accumulo/server/metrics/ProcessMetrics.java    |   7 +-
 .../accumulo/server/metrics/ThriftMetrics.java     |   7 +-
 .../org/apache/accumulo/compactor/Compactor.java   |   8 +-
 .../org/apache/accumulo/gc/metrics/GcMetrics.java  |  30 ++-
 .../accumulo/manager/metrics/BalancerMetrics.java  |   2 +-
 .../apache/accumulo/tserver/BlockCacheMetrics.java |  18 +-
 .../apache/accumulo/tserver/ScanServerMetrics.java |  10 +-
 .../tserver/metrics/TabletServerMetrics.java       |  39 ++--
 .../tserver/metrics/TabletServerMinCMetrics.java   |   6 +-
 .../tserver/metrics/TabletServerScanMetrics.java   |  35 ++--
 .../tserver/metrics/TabletServerUpdateMetrics.java |  16 +-
 13 files changed, 222 insertions(+), 191 deletions(-)

diff --cc core/src/main/java/org/apache/accumulo/core/metrics/Metric.java
index 7252b237ef,27b5e3bf00..d9f3791964
--- a/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java
@@@ -31,24 -31,14 +31,26 @@@ public enum Metric 
        MetricCategory.GENERAL_SERVER),
  
    // Compactor Metrics
-   COMPACTOR_MAJC_STUCK("accumulo.compactor.majc.stuck", 
MetricType.LONG_TASK_TIMER, "",
-       MetricCategory.COMPACTOR),
+   COMPACTOR_MAJC_STUCK("accumulo.compactor.majc.stuck", 
MetricType.LONG_TASK_TIMER,
+       "Number and duration of stuck major compactions.", 
MetricCategory.COMPACTOR),
    COMPACTOR_ENTRIES_READ("accumulo.compactor.entries.read", 
MetricType.FUNCTION_COUNTER,
-       "Number of entries read by all threads performing compactions.", 
MetricCategory.COMPACTOR),
+       "Number of entries read by all compactions that have run on this 
compactor.",
+       MetricCategory.COMPACTOR),
    COMPACTOR_ENTRIES_WRITTEN("accumulo.compactor.entries.written", 
MetricType.FUNCTION_COUNTER,
-       "Number of entries written by all threads performing compactions.", 
MetricCategory.COMPACTOR),
+       "Number of entries written by all compactions that have run on this 
compactor.",
+       MetricCategory.COMPACTOR),
 +  COMPACTOR_JOB_PRIORITY_QUEUES("accumulo.compactor.queue.count", 
MetricType.GAUGE,
 +      "Number of priority queues for compaction jobs.", 
MetricCategory.COMPACTOR),
 +  COMPACTOR_JOB_PRIORITY_QUEUE_LENGTH("accumulo.compactor.queue.length", 
MetricType.GAUGE, "",
 +      MetricCategory.COMPACTOR),
 +  
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_DEQUEUED("accumulo.compactor.queue.jobs.dequeued",
 +      MetricType.GAUGE, "", MetricCategory.COMPACTOR),
 +  
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_QUEUED("accumulo.compactor.queue.jobs.queued",
 MetricType.GAUGE,
 +      "", MetricCategory.COMPACTOR),
 +  
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_REJECTED("accumulo.compactor.queue.jobs.rejected",
 +      MetricType.GAUGE, "", MetricCategory.COMPACTOR),
 +  
COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_PRIORITY("accumulo.compactor.queue.jobs.priority",
 +      MetricType.GAUGE, "", MetricCategory.COMPACTOR),
  
    // Fate Metrics
    FATE_TYPE_IN_PROGRESS("accumulo.fate.ops.in.progress.by.type", 
MetricType.GAUGE,
@@@ -63,57 -56,72 +68,77 @@@
        MetricCategory.FATE),
  
    // Garbage Collection Metrics
-   GC_STARTED("accumulo.gc.started", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_FINISHED("accumulo.gc.finished", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_CANDIDATES("accumulo.gc.candidates", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_IN_USE("accumulo.gc.in.use", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_DELETED("accumulo.gc.deleted", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_ERRORS("accumulo.gc.errors", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_WAL_STARTED("accumulo.gc.wal.started", MetricType.GAUGE, "",
+   GC_STARTED("accumulo.gc.started", MetricType.GAUGE, "Timestamp GC file 
collection cycle started.",
+       MetricCategory.GARBAGE_COLLECTION),
+   GC_FINISHED("accumulo.gc.finished", MetricType.GAUGE, "Timestamp GC file 
collect cycle finished.",
+       MetricCategory.GARBAGE_COLLECTION),
+   GC_CANDIDATES("accumulo.gc.candidates", MetricType.GAUGE,
+       "Number of files that are candidates for deletion.", 
MetricCategory.GARBAGE_COLLECTION),
+   GC_IN_USE("accumulo.gc.in.use", MetricType.GAUGE, "Number of candidate 
files still in use.",
        MetricCategory.GARBAGE_COLLECTION),
-   GC_WAL_FINISHED("accumulo.gc.wal.finished", MetricType.GAUGE, "",
+   GC_DELETED("accumulo.gc.deleted", MetricType.GAUGE, "Number of candidate 
files deleted.",
        MetricCategory.GARBAGE_COLLECTION),
-   GC_WAL_CANDIDATES("accumulo.gc.wal.candidates", MetricType.GAUGE, "",
+   GC_ERRORS("accumulo.gc.errors", MetricType.GAUGE, "Number of candidate 
deletion errors.",
        MetricCategory.GARBAGE_COLLECTION),
-   GC_WAL_IN_USE("accumulo.gc.wal.in.use", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_WAL_DELETED("accumulo.gc.wal.deleted", MetricType.GAUGE, "",
+   GC_WAL_STARTED("accumulo.gc.wal.started", MetricType.GAUGE,
+       "Timestamp GC WAL collection cycle started.", 
MetricCategory.GARBAGE_COLLECTION),
+   GC_WAL_FINISHED("accumulo.gc.wal.finished", MetricType.GAUGE,
+       "Timestamp GC WAL collect cycle finished.", 
MetricCategory.GARBAGE_COLLECTION),
+   GC_WAL_CANDIDATES("accumulo.gc.wal.candidates", MetricType.GAUGE,
+       "Number of files that are candidates for deletion.", 
MetricCategory.GARBAGE_COLLECTION),
+   GC_WAL_IN_USE("accumulo.gc.wal.in.use", MetricType.GAUGE,
+       "Number of wal file candidates that are still in use.", 
MetricCategory.GARBAGE_COLLECTION),
+   GC_WAL_DELETED("accumulo.gc.wal.deleted", MetricType.GAUGE,
+       "Number of candidate wal files deleted.", 
MetricCategory.GARBAGE_COLLECTION),
+   GC_WAL_ERRORS("accumulo.gc.wal.errors", MetricType.GAUGE,
+       "Number candidate wal file deletion errors.", 
MetricCategory.GARBAGE_COLLECTION),
+   GC_POST_OP_DURATION("accumulo.gc.post.op.duration", MetricType.GAUGE,
+       "GC metadata table post operation duration in milliseconds.",
        MetricCategory.GARBAGE_COLLECTION),
-   GC_WAL_ERRORS("accumulo.gc.wal.errors", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
-   GC_POST_OP_DURATION("accumulo.gc.post.op.duration", MetricType.GAUGE, "",
+   GC_RUN_CYCLE("accumulo.gc.run.cycle", MetricType.GAUGE,
+       "Count of gc cycle runs. Value is reset on process start.",
        MetricCategory.GARBAGE_COLLECTION),
-   GC_RUN_CYCLE("accumulo.gc.run.cycle", MetricType.GAUGE, "", 
MetricCategory.GARBAGE_COLLECTION),
  
    // Tablet Server Metrics
-   TSERVER_ENTRIES("accumulo.tserver.entries", MetricType.GAUGE, "", 
MetricCategory.TABLET_SERVER),
-   TSERVER_MEM_ENTRIES("accumulo.tserver.entries.mem", MetricType.GAUGE, "",
-       MetricCategory.TABLET_SERVER),
-   TSERVER_MINC_QUEUED("accumulo.tserver.minc.queued", MetricType.GAUGE, "",
-       MetricCategory.TABLET_SERVER),
-   TSERVER_MINC_RUNNING("accumulo.tserver.minc.running", MetricType.GAUGE, "",
-       MetricCategory.TABLET_SERVER),
-   TSERVER_MINC_TOTAL("accumulo.tserver.minc.total", MetricType.GAUGE, "",
-       MetricCategory.TABLET_SERVER),
-   TSERVER_TABLETS_ONLINE("accumulo.tserver.tablets.online", MetricType.GAUGE, 
"",
-       MetricCategory.TABLET_SERVER),
+   TSERVER_ENTRIES("accumulo.tserver.entries", MetricType.GAUGE, "Number of 
entries.",
+       MetricCategory.TABLET_SERVER),
+   TSERVER_MEM_ENTRIES("accumulo.tserver.entries.mem", MetricType.GAUGE,
+       "Number of entries in memory.", MetricCategory.TABLET_SERVER),
+   TSERVER_MAJC_RUNNING("accumulo.tserver.majc.running", MetricType.GAUGE,
+       "Number of active major compactions.", MetricCategory.TABLET_SERVER),
+   TSERVER_MAJC_STUCK("accumulo.tserver.majc.stuck", MetricType.GAUGE,
+       "Number and duration of stuck major compactions.", 
MetricCategory.TABLET_SERVER),
+   TSERVER_MAJC_QUEUED("accumulo.tserver.majc.queued", MetricType.GAUGE,
+       "Number of queued major compactions.", MetricCategory.TABLET_SERVER),
+   TSERVER_MINC_QUEUED("accumulo.tserver.minc.queued", MetricType.GAUGE,
+       "Number of queued minor compactions.", MetricCategory.TABLET_SERVER),
+   TSERVER_MINC_RUNNING("accumulo.tserver.minc.running", MetricType.GAUGE,
+       "Number of active minor compactions.", MetricCategory.TABLET_SERVER),
+   TSERVER_MINC_TOTAL("accumulo.tserver.minc.total", MetricType.GAUGE,
+       "Total number of minor compactions performed.", 
MetricCategory.TABLET_SERVER),
+   TSERVER_TABLETS_ONLINE("accumulo.tserver.tablets.online", MetricType.GAUGE,
+       "Number of online tablets.", MetricCategory.TABLET_SERVER),
    
TSERVER_TABLETS_LONG_ASSIGNMENTS("accumulo.tserver.tablets.assignments.warning",
 MetricType.GAUGE,
-       "", MetricCategory.TABLET_SERVER),
-   TSERVER_TABLETS_OPENING("accumulo.tserver.tablets.opening", 
MetricType.GAUGE, "",
-       MetricCategory.TABLET_SERVER),
-   TSERVER_TABLETS_UNOPENED("accumulo.tserver.tablets.unopened", 
MetricType.GAUGE, "",
-       MetricCategory.TABLET_SERVER),
-   TSERVER_TABLETS_FILES("accumulo.tserver.tablets.files", MetricType.GAUGE, 
"",
-       MetricCategory.TABLET_SERVER),
+       "Number of tablet assignments that are taking longer than the 
configured warning duration.",
+       MetricCategory.TABLET_SERVER),
+   TSERVER_TABLETS_OPENING("accumulo.tserver.tablets.opening", 
MetricType.GAUGE,
+       "Number of opening tablets.", MetricCategory.TABLET_SERVER),
+   TSERVER_TABLETS_UNOPENED("accumulo.tserver.tablets.unopened", 
MetricType.GAUGE,
+       "Number of unopened tablets.", MetricCategory.TABLET_SERVER),
+   TSERVER_TABLETS_FILES("accumulo.tserver.tablets.files", MetricType.GAUGE,
+       "Number of files per tablet.", MetricCategory.TABLET_SERVER),
    TSERVER_INGEST_MUTATIONS("accumulo.tserver.ingest.mutations", 
MetricType.GAUGE,
-       "Prior to 2.1.0 this metric was reported as a rate, it is now the count 
and the rate can be derived.",
+       "Ingest mutation count. The rate can be derived from this metric.",
        MetricCategory.TABLET_SERVER),
    TSERVER_INGEST_BYTES("accumulo.tserver.ingest.bytes", MetricType.GAUGE,
-       "Prior to 2.1.0 this metric was reported as a rate, it is now the count 
and the rate can be derived.",
-       MetricCategory.TABLET_SERVER),
-   TSERVER_HOLD("accumulo.tserver.hold", MetricType.GAUGE, "", 
MetricCategory.TABLET_SERVER),
-   TSERVER_TABLETS_ONLINE_ONDEMAND("accumulo.tserver.tablets.ondemand.online", 
MetricType.GAUGE, "",
-       MetricCategory.TABLET_SERVER),
+       "Ingest byte count. The rate can be derived from this metric.", 
MetricCategory.TABLET_SERVER),
+   TSERVER_HOLD("accumulo.tserver.hold", MetricType.GAUGE,
+       "Duration for which commits have been held in milliseconds.", 
MetricCategory.TABLET_SERVER),
++  TSERVER_TABLETS_ONLINE_ONDEMAND("accumulo.tserver.tablets.ondemand.online", 
MetricType.GAUGE,
++      "Number of online on-demand tablets", MetricCategory.TABLET_SERVER),
 +  
TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM("accumulo.tserver.tablets.ondemand.unloaded.lowmem",
-       MetricType.GAUGE, "", MetricCategory.TABLET_SERVER),
++      MetricType.GAUGE, "Number of online on-demand tablets unloaded due to 
low memory",
++      MetricCategory.TABLET_SERVER),
  
    // Scan Metrics
    SCAN_RESERVATION_TOTAL_TIMER("accumulo.scan.reservation.total.timer", 
MetricType.TIMER,
diff --cc 
server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetrics.java
index 76753b2c54,134c604a67..68798e6d67
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetrics.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetrics.java
@@@ -72,41 -71,37 +71,39 @@@ public class TabletServerMetrics implem
      FunctionCounter
          .builder(COMPACTOR_ENTRIES_WRITTEN.getName(), this,
              TabletServerMetrics::getTotalEntriesWritten)
-         .description("Number of entries written by all compactions that have 
run on this tserver")
-         .register(registry);
+         
.description(COMPACTOR_ENTRIES_WRITTEN.getDescription()).register(registry);
 -    LongTaskTimer timer = LongTaskTimer.builder(TSERVER_MAJC_STUCK.getName())
 -        .description(TSERVER_MAJC_STUCK.getDescription()).register(registry);
 +    LongTaskTimer timer = 
LongTaskTimer.builder(COMPACTOR_MAJC_STUCK.getName())
-         .description("Number and duration of stuck major 
compactions").register(registry);
++        
.description(COMPACTOR_MAJC_STUCK.getDescription()).register(registry);
      CompactionWatcher.setTimer(timer);
 +
      Gauge
          .builder(TSERVER_TABLETS_LONG_ASSIGNMENTS.getName(), util,
              TabletServerMetricsUtil::getLongTabletAssignments)
-         .description("Number of tablet assignments that are taking a long 
time").register(registry);
+         
.description(TSERVER_TABLETS_LONG_ASSIGNMENTS.getDescription()).register(registry);
  
      Gauge.builder(TSERVER_ENTRIES.getName(), util, 
TabletServerMetricsUtil::getEntries)
-         .description("Number of entries").register(registry);
+         .description(TSERVER_ENTRIES.getDescription()).register(registry);
      Gauge.builder(TSERVER_MEM_ENTRIES.getName(), util, 
TabletServerMetricsUtil::getEntriesInMemory)
-         .description("Number of entries in memory").register(registry);
+         .description(TSERVER_MEM_ENTRIES.getDescription()).register(registry);
 -    Gauge
 -        .builder(TSERVER_MAJC_RUNNING.getName(), util, 
TabletServerMetricsUtil::getMajorCompactions)
 -        
.description(TSERVER_MAJC_RUNNING.getDescription()).register(registry);
 -    Gauge
 -        .builder(TSERVER_MAJC_QUEUED.getName(), util,
 -            TabletServerMetricsUtil::getMajorCompactionsQueued)
 -        .description(TSERVER_MINC_QUEUED.getDescription()).register(registry);
      Gauge
          .builder(TSERVER_MINC_RUNNING.getName(), util, 
TabletServerMetricsUtil::getMinorCompactions)
-         .description("Number of active minor compactions").register(registry);
+         
.description(TSERVER_MINC_RUNNING.getDescription()).register(registry);
      Gauge
          .builder(TSERVER_MINC_QUEUED.getName(), util,
              TabletServerMetricsUtil::getMinorCompactionsQueued)
-         .description("Number of queued minor compactions").register(registry);
+         .description(TSERVER_MINC_QUEUED.getDescription()).register(registry);
 +    Gauge
 +        .builder(TSERVER_TABLETS_ONLINE_ONDEMAND.getName(), util,
 +            TabletServerMetricsUtil::getOnDemandOnlineCount)
-         .description("Number of online on-demand tablets").register(registry);
++        
.description(TSERVER_TABLETS_ONLINE_ONDEMAND.getDescription()).register(registry);
 +    Gauge
 +        .builder(TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM.getName(), util,
 +            TabletServerMetricsUtil::getOnDemandUnloadedLowMem)
-         .description("Number of online on-demand tablets unloaded due to low 
memory")
-         .register(registry);
++        
.description(TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM.getDescription()).register(registry);
      Gauge.builder(TSERVER_TABLETS_ONLINE.getName(), util, 
TabletServerMetricsUtil::getOnlineCount)
-         .description("Number of online tablets").register(registry);
+         
.description(TSERVER_TABLETS_ONLINE.getDescription()).register(registry);
      Gauge.builder(TSERVER_TABLETS_OPENING.getName(), util, 
TabletServerMetricsUtil::getOpeningCount)
-         .description("Number of opening tablets").register(registry);
+         
.description(TSERVER_TABLETS_OPENING.getDescription()).register(registry);
      Gauge
          .builder(TSERVER_TABLETS_UNOPENED.getName(), util,
              TabletServerMetricsUtil::getUnopenedCount)

Reply via email to