This is an automated email from the ASF dual-hosted git repository. domgarguilo pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 7f40306d968f0d76ab1d9adeadbd481ea24f099d Merge: 816fa97cbf 960cee8d45 Author: Dom Garguilo <domgargu...@apache.org> AuthorDate: Mon Sep 23 15:24:45 2024 -0400 Merge remote-tracking branch 'upstream/3.1' .../org/apache/accumulo/core/metrics/Metric.java | 231 ++++++++++++--------- .../server/compaction/PausedCompactionMetrics.java | 4 +- .../accumulo/server/metrics/ProcessMetrics.java | 7 +- .../accumulo/server/metrics/ThriftMetrics.java | 7 +- .../org/apache/accumulo/compactor/Compactor.java | 8 +- .../org/apache/accumulo/gc/metrics/GcMetrics.java | 30 ++- .../accumulo/manager/metrics/BalancerMetrics.java | 2 +- .../apache/accumulo/tserver/BlockCacheMetrics.java | 18 +- .../apache/accumulo/tserver/ScanServerMetrics.java | 10 +- .../tserver/metrics/TabletServerMetrics.java | 39 ++-- .../tserver/metrics/TabletServerMinCMetrics.java | 6 +- .../tserver/metrics/TabletServerScanMetrics.java | 35 ++-- .../tserver/metrics/TabletServerUpdateMetrics.java | 16 +- 13 files changed, 222 insertions(+), 191 deletions(-) diff --cc core/src/main/java/org/apache/accumulo/core/metrics/Metric.java index 7252b237ef,27b5e3bf00..d9f3791964 --- a/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java +++ b/core/src/main/java/org/apache/accumulo/core/metrics/Metric.java @@@ -31,24 -31,14 +31,26 @@@ public enum Metric MetricCategory.GENERAL_SERVER), // Compactor Metrics - COMPACTOR_MAJC_STUCK("accumulo.compactor.majc.stuck", MetricType.LONG_TASK_TIMER, "", - MetricCategory.COMPACTOR), + COMPACTOR_MAJC_STUCK("accumulo.compactor.majc.stuck", MetricType.LONG_TASK_TIMER, + "Number and duration of stuck major compactions.", MetricCategory.COMPACTOR), COMPACTOR_ENTRIES_READ("accumulo.compactor.entries.read", MetricType.FUNCTION_COUNTER, - "Number of entries read by all threads performing compactions.", MetricCategory.COMPACTOR), + "Number of entries read by all compactions that have run on this compactor.", + MetricCategory.COMPACTOR), COMPACTOR_ENTRIES_WRITTEN("accumulo.compactor.entries.written", MetricType.FUNCTION_COUNTER, - "Number of entries written by all threads performing compactions.", MetricCategory.COMPACTOR), + "Number of entries written by all compactions that have run on this compactor.", + MetricCategory.COMPACTOR), + COMPACTOR_JOB_PRIORITY_QUEUES("accumulo.compactor.queue.count", MetricType.GAUGE, + "Number of priority queues for compaction jobs.", MetricCategory.COMPACTOR), + COMPACTOR_JOB_PRIORITY_QUEUE_LENGTH("accumulo.compactor.queue.length", MetricType.GAUGE, "", + MetricCategory.COMPACTOR), + COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_DEQUEUED("accumulo.compactor.queue.jobs.dequeued", + MetricType.GAUGE, "", MetricCategory.COMPACTOR), + COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_QUEUED("accumulo.compactor.queue.jobs.queued", MetricType.GAUGE, + "", MetricCategory.COMPACTOR), + COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_REJECTED("accumulo.compactor.queue.jobs.rejected", + MetricType.GAUGE, "", MetricCategory.COMPACTOR), + COMPACTOR_JOB_PRIORITY_QUEUE_JOBS_PRIORITY("accumulo.compactor.queue.jobs.priority", + MetricType.GAUGE, "", MetricCategory.COMPACTOR), // Fate Metrics FATE_TYPE_IN_PROGRESS("accumulo.fate.ops.in.progress.by.type", MetricType.GAUGE, @@@ -63,57 -56,72 +68,77 @@@ MetricCategory.FATE), // Garbage Collection Metrics - GC_STARTED("accumulo.gc.started", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_FINISHED("accumulo.gc.finished", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_CANDIDATES("accumulo.gc.candidates", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_IN_USE("accumulo.gc.in.use", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_DELETED("accumulo.gc.deleted", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_ERRORS("accumulo.gc.errors", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_WAL_STARTED("accumulo.gc.wal.started", MetricType.GAUGE, "", + GC_STARTED("accumulo.gc.started", MetricType.GAUGE, "Timestamp GC file collection cycle started.", + MetricCategory.GARBAGE_COLLECTION), + GC_FINISHED("accumulo.gc.finished", MetricType.GAUGE, "Timestamp GC file collect cycle finished.", + MetricCategory.GARBAGE_COLLECTION), + GC_CANDIDATES("accumulo.gc.candidates", MetricType.GAUGE, + "Number of files that are candidates for deletion.", MetricCategory.GARBAGE_COLLECTION), + GC_IN_USE("accumulo.gc.in.use", MetricType.GAUGE, "Number of candidate files still in use.", MetricCategory.GARBAGE_COLLECTION), - GC_WAL_FINISHED("accumulo.gc.wal.finished", MetricType.GAUGE, "", + GC_DELETED("accumulo.gc.deleted", MetricType.GAUGE, "Number of candidate files deleted.", MetricCategory.GARBAGE_COLLECTION), - GC_WAL_CANDIDATES("accumulo.gc.wal.candidates", MetricType.GAUGE, "", + GC_ERRORS("accumulo.gc.errors", MetricType.GAUGE, "Number of candidate deletion errors.", MetricCategory.GARBAGE_COLLECTION), - GC_WAL_IN_USE("accumulo.gc.wal.in.use", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_WAL_DELETED("accumulo.gc.wal.deleted", MetricType.GAUGE, "", + GC_WAL_STARTED("accumulo.gc.wal.started", MetricType.GAUGE, + "Timestamp GC WAL collection cycle started.", MetricCategory.GARBAGE_COLLECTION), + GC_WAL_FINISHED("accumulo.gc.wal.finished", MetricType.GAUGE, + "Timestamp GC WAL collect cycle finished.", MetricCategory.GARBAGE_COLLECTION), + GC_WAL_CANDIDATES("accumulo.gc.wal.candidates", MetricType.GAUGE, + "Number of files that are candidates for deletion.", MetricCategory.GARBAGE_COLLECTION), + GC_WAL_IN_USE("accumulo.gc.wal.in.use", MetricType.GAUGE, + "Number of wal file candidates that are still in use.", MetricCategory.GARBAGE_COLLECTION), + GC_WAL_DELETED("accumulo.gc.wal.deleted", MetricType.GAUGE, + "Number of candidate wal files deleted.", MetricCategory.GARBAGE_COLLECTION), + GC_WAL_ERRORS("accumulo.gc.wal.errors", MetricType.GAUGE, + "Number candidate wal file deletion errors.", MetricCategory.GARBAGE_COLLECTION), + GC_POST_OP_DURATION("accumulo.gc.post.op.duration", MetricType.GAUGE, + "GC metadata table post operation duration in milliseconds.", MetricCategory.GARBAGE_COLLECTION), - GC_WAL_ERRORS("accumulo.gc.wal.errors", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), - GC_POST_OP_DURATION("accumulo.gc.post.op.duration", MetricType.GAUGE, "", + GC_RUN_CYCLE("accumulo.gc.run.cycle", MetricType.GAUGE, + "Count of gc cycle runs. Value is reset on process start.", MetricCategory.GARBAGE_COLLECTION), - GC_RUN_CYCLE("accumulo.gc.run.cycle", MetricType.GAUGE, "", MetricCategory.GARBAGE_COLLECTION), // Tablet Server Metrics - TSERVER_ENTRIES("accumulo.tserver.entries", MetricType.GAUGE, "", MetricCategory.TABLET_SERVER), - TSERVER_MEM_ENTRIES("accumulo.tserver.entries.mem", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), - TSERVER_MINC_QUEUED("accumulo.tserver.minc.queued", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), - TSERVER_MINC_RUNNING("accumulo.tserver.minc.running", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), - TSERVER_MINC_TOTAL("accumulo.tserver.minc.total", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), - TSERVER_TABLETS_ONLINE("accumulo.tserver.tablets.online", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), + TSERVER_ENTRIES("accumulo.tserver.entries", MetricType.GAUGE, "Number of entries.", + MetricCategory.TABLET_SERVER), + TSERVER_MEM_ENTRIES("accumulo.tserver.entries.mem", MetricType.GAUGE, + "Number of entries in memory.", MetricCategory.TABLET_SERVER), + TSERVER_MAJC_RUNNING("accumulo.tserver.majc.running", MetricType.GAUGE, + "Number of active major compactions.", MetricCategory.TABLET_SERVER), + TSERVER_MAJC_STUCK("accumulo.tserver.majc.stuck", MetricType.GAUGE, + "Number and duration of stuck major compactions.", MetricCategory.TABLET_SERVER), + TSERVER_MAJC_QUEUED("accumulo.tserver.majc.queued", MetricType.GAUGE, + "Number of queued major compactions.", MetricCategory.TABLET_SERVER), + TSERVER_MINC_QUEUED("accumulo.tserver.minc.queued", MetricType.GAUGE, + "Number of queued minor compactions.", MetricCategory.TABLET_SERVER), + TSERVER_MINC_RUNNING("accumulo.tserver.minc.running", MetricType.GAUGE, + "Number of active minor compactions.", MetricCategory.TABLET_SERVER), + TSERVER_MINC_TOTAL("accumulo.tserver.minc.total", MetricType.GAUGE, + "Total number of minor compactions performed.", MetricCategory.TABLET_SERVER), + TSERVER_TABLETS_ONLINE("accumulo.tserver.tablets.online", MetricType.GAUGE, + "Number of online tablets.", MetricCategory.TABLET_SERVER), TSERVER_TABLETS_LONG_ASSIGNMENTS("accumulo.tserver.tablets.assignments.warning", MetricType.GAUGE, - "", MetricCategory.TABLET_SERVER), - TSERVER_TABLETS_OPENING("accumulo.tserver.tablets.opening", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), - TSERVER_TABLETS_UNOPENED("accumulo.tserver.tablets.unopened", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), - TSERVER_TABLETS_FILES("accumulo.tserver.tablets.files", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), + "Number of tablet assignments that are taking longer than the configured warning duration.", + MetricCategory.TABLET_SERVER), + TSERVER_TABLETS_OPENING("accumulo.tserver.tablets.opening", MetricType.GAUGE, + "Number of opening tablets.", MetricCategory.TABLET_SERVER), + TSERVER_TABLETS_UNOPENED("accumulo.tserver.tablets.unopened", MetricType.GAUGE, + "Number of unopened tablets.", MetricCategory.TABLET_SERVER), + TSERVER_TABLETS_FILES("accumulo.tserver.tablets.files", MetricType.GAUGE, + "Number of files per tablet.", MetricCategory.TABLET_SERVER), TSERVER_INGEST_MUTATIONS("accumulo.tserver.ingest.mutations", MetricType.GAUGE, - "Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be derived.", + "Ingest mutation count. The rate can be derived from this metric.", MetricCategory.TABLET_SERVER), TSERVER_INGEST_BYTES("accumulo.tserver.ingest.bytes", MetricType.GAUGE, - "Prior to 2.1.0 this metric was reported as a rate, it is now the count and the rate can be derived.", - MetricCategory.TABLET_SERVER), - TSERVER_HOLD("accumulo.tserver.hold", MetricType.GAUGE, "", MetricCategory.TABLET_SERVER), - TSERVER_TABLETS_ONLINE_ONDEMAND("accumulo.tserver.tablets.ondemand.online", MetricType.GAUGE, "", - MetricCategory.TABLET_SERVER), + "Ingest byte count. The rate can be derived from this metric.", MetricCategory.TABLET_SERVER), + TSERVER_HOLD("accumulo.tserver.hold", MetricType.GAUGE, + "Duration for which commits have been held in milliseconds.", MetricCategory.TABLET_SERVER), ++ TSERVER_TABLETS_ONLINE_ONDEMAND("accumulo.tserver.tablets.ondemand.online", MetricType.GAUGE, ++ "Number of online on-demand tablets", MetricCategory.TABLET_SERVER), + TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM("accumulo.tserver.tablets.ondemand.unloaded.lowmem", - MetricType.GAUGE, "", MetricCategory.TABLET_SERVER), ++ MetricType.GAUGE, "Number of online on-demand tablets unloaded due to low memory", ++ MetricCategory.TABLET_SERVER), // Scan Metrics SCAN_RESERVATION_TOTAL_TIMER("accumulo.scan.reservation.total.timer", MetricType.TIMER, diff --cc server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetrics.java index 76753b2c54,134c604a67..68798e6d67 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetrics.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/metrics/TabletServerMetrics.java @@@ -72,41 -71,37 +71,39 @@@ public class TabletServerMetrics implem FunctionCounter .builder(COMPACTOR_ENTRIES_WRITTEN.getName(), this, TabletServerMetrics::getTotalEntriesWritten) - .description("Number of entries written by all compactions that have run on this tserver") - .register(registry); + .description(COMPACTOR_ENTRIES_WRITTEN.getDescription()).register(registry); - LongTaskTimer timer = LongTaskTimer.builder(TSERVER_MAJC_STUCK.getName()) - .description(TSERVER_MAJC_STUCK.getDescription()).register(registry); + LongTaskTimer timer = LongTaskTimer.builder(COMPACTOR_MAJC_STUCK.getName()) - .description("Number and duration of stuck major compactions").register(registry); ++ .description(COMPACTOR_MAJC_STUCK.getDescription()).register(registry); CompactionWatcher.setTimer(timer); + Gauge .builder(TSERVER_TABLETS_LONG_ASSIGNMENTS.getName(), util, TabletServerMetricsUtil::getLongTabletAssignments) - .description("Number of tablet assignments that are taking a long time").register(registry); + .description(TSERVER_TABLETS_LONG_ASSIGNMENTS.getDescription()).register(registry); Gauge.builder(TSERVER_ENTRIES.getName(), util, TabletServerMetricsUtil::getEntries) - .description("Number of entries").register(registry); + .description(TSERVER_ENTRIES.getDescription()).register(registry); Gauge.builder(TSERVER_MEM_ENTRIES.getName(), util, TabletServerMetricsUtil::getEntriesInMemory) - .description("Number of entries in memory").register(registry); + .description(TSERVER_MEM_ENTRIES.getDescription()).register(registry); - Gauge - .builder(TSERVER_MAJC_RUNNING.getName(), util, TabletServerMetricsUtil::getMajorCompactions) - .description(TSERVER_MAJC_RUNNING.getDescription()).register(registry); - Gauge - .builder(TSERVER_MAJC_QUEUED.getName(), util, - TabletServerMetricsUtil::getMajorCompactionsQueued) - .description(TSERVER_MINC_QUEUED.getDescription()).register(registry); Gauge .builder(TSERVER_MINC_RUNNING.getName(), util, TabletServerMetricsUtil::getMinorCompactions) - .description("Number of active minor compactions").register(registry); + .description(TSERVER_MINC_RUNNING.getDescription()).register(registry); Gauge .builder(TSERVER_MINC_QUEUED.getName(), util, TabletServerMetricsUtil::getMinorCompactionsQueued) - .description("Number of queued minor compactions").register(registry); + .description(TSERVER_MINC_QUEUED.getDescription()).register(registry); + Gauge + .builder(TSERVER_TABLETS_ONLINE_ONDEMAND.getName(), util, + TabletServerMetricsUtil::getOnDemandOnlineCount) - .description("Number of online on-demand tablets").register(registry); ++ .description(TSERVER_TABLETS_ONLINE_ONDEMAND.getDescription()).register(registry); + Gauge + .builder(TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM.getName(), util, + TabletServerMetricsUtil::getOnDemandUnloadedLowMem) - .description("Number of online on-demand tablets unloaded due to low memory") - .register(registry); ++ .description(TSERVER_TABLETS_ONDEMAND_UNLOADED_FOR_MEM.getDescription()).register(registry); Gauge.builder(TSERVER_TABLETS_ONLINE.getName(), util, TabletServerMetricsUtil::getOnlineCount) - .description("Number of online tablets").register(registry); + .description(TSERVER_TABLETS_ONLINE.getDescription()).register(registry); Gauge.builder(TSERVER_TABLETS_OPENING.getName(), util, TabletServerMetricsUtil::getOpeningCount) - .description("Number of opening tablets").register(registry); + .description(TSERVER_TABLETS_OPENING.getDescription()).register(registry); Gauge .builder(TSERVER_TABLETS_UNOPENED.getName(), util, TabletServerMetricsUtil::getUnopenedCount)