This is an automated email from the ASF dual-hosted git repository. rongr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new e62db612c9 Metrics for Table Disabled and Consumption Paused (#12000) e62db612c9 is described below commit e62db612c91b4b540bcdfeecb412442eac35ced2 Author: Prashant Pandey <84911643+suddend...@users.noreply.github.com> AuthorDate: Thu Dec 7 21:21:50 2023 +0530 Metrics for Table Disabled and Consumption Paused (#12000) --- .../configs/controller.yml | 12 ++++++++ .../pinot/common/metrics/ControllerGauge.java | 6 +++- .../controller/helix/SegmentStatusChecker.java | 35 ++++++++++++++++++++-- .../realtime/PinotLLCRealtimeSegmentManager.java | 6 ++-- 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml index c4071887ed..e86243dfc1 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml @@ -204,6 +204,18 @@ rules: cache: true labels: version: "$2" +- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.tableConsumptionPaused.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)" + name: "pinot_controller_tableConsumptionPaused_$3" + cache: true + labels: + tableName: "$1" + tableType: "$2" +- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.tableDisabled.([^\\.]*?)_(OFFLINE|REALTIME)\"><>(\\w+)" + name: "pinot_controller_tableDisabled_$3" + cache: true + labels: + tableName: "$1" + tableType: "$2" ## Metrics that fit the catch-all patterns above should not be added to this file. ## In case a metric does not fit the catch-all patterns, add them before this comment diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java index 4006ca45b0..3444ffae5f 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java @@ -149,7 +149,11 @@ public enum ControllerGauge implements AbstractMetrics.Gauge { // Number of tables that we want to fix but failed to update table config FAILED_TO_UPDATE_TABLE_CONFIG_COUNT("failedToUpdateTableConfigCount", true), - LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize", false); + LLC_SEGMENTS_DEEP_STORE_UPLOAD_RETRY_QUEUE_SIZE("LLCSegmentDeepStoreUploadRetryQueueSize", false), + + TABLE_CONSUMPTION_PAUSED("tableConsumptionPaused", false), + + TABLE_DISABLED("tableDisabled", false); private final String _gaugeName; private final String _unit; diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java index f64e6c3e75..617564757e 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java @@ -44,6 +44,7 @@ import org.apache.pinot.controller.LeadControllerManager; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; import org.apache.pinot.controller.helix.core.periodictask.ControllerPeriodicTask; import org.apache.pinot.controller.helix.core.realtime.MissingConsumingSegmentFinder; +import org.apache.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager; import org.apache.pinot.controller.util.TableSizeReader; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; @@ -133,7 +134,24 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh protected void postprocess(Context context) { _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.REALTIME_TABLE_COUNT, context._realTimeTableCount); _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.OFFLINE_TABLE_COUNT, context._offlineTableCount); - _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTableCount); + _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTables.size()); + + //emit a 0 for tables that are not paused/disabled. This makes alert expressions simpler as we don't have to deal + // with missing metrics + context._processedTables.forEach(tableNameWithType -> { + if (context._pausedTables.contains(tableNameWithType)) { + _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED, 1); + } else { + _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED, 0); + } + }); + context._processedTables.forEach(tableNameWithType -> { + if (context._disabledTables.contains(tableNameWithType)) { + _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED, 1); + } else { + _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED, 0); + } + }); // Remove metrics for tables that are no longer in the cluster _cachedTableNamesWithType.removeAll(context._processedTables); @@ -186,10 +204,18 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh LOGGER.warn("Table {} is disabled. Skipping segment status checks", tableNameWithType); } resetTableMetrics(tableNameWithType); - context._disabledTableCount++; + context._disabledTables.add(tableNameWithType); return; } + //check if table consumption is paused + boolean isTablePaused = + Boolean.parseBoolean(idealState.getRecord().getSimpleField(PinotLLCRealtimeSegmentManager.IS_TABLE_PAUSED)); + + if (isTablePaused) { + context._pausedTables.add(tableNameWithType); + } + if (idealState.getPartitionSet().isEmpty()) { int nReplicasFromIdealState = 1; try { @@ -335,6 +361,8 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.SEGMENTS_IN_ERROR_STATE); _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE); + _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED); + _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED); } private void setStatusToDefault() { @@ -367,7 +395,8 @@ public class SegmentStatusChecker extends ControllerPeriodicTask<SegmentStatusCh private boolean _logDisabledTables; private int _realTimeTableCount; private int _offlineTableCount; - private int _disabledTableCount; private Set<String> _processedTables = new HashSet<>(); + private Set<String> _disabledTables = new HashSet<>(); + private Set<String> _pausedTables = new HashSet<>(); } } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java index 74eb758b26..298b16d605 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java @@ -129,14 +129,14 @@ import org.slf4j.LoggerFactory; * TODO: migrate code in this class to other places for better readability */ public class PinotLLCRealtimeSegmentManager { + + // simple field in Ideal State representing pause status for the table + public static final String IS_TABLE_PAUSED = "isTablePaused"; private static final Logger LOGGER = LoggerFactory.getLogger(PinotLLCRealtimeSegmentManager.class); private static final int STARTING_SEQUENCE_NUMBER = 0; // Initial sequence number for new table segments private static final String METADATA_EVENT_NOTIFIER_PREFIX = "metadata.event.notifier"; - // simple field in Ideal State representing pause status for the table - private static final String IS_TABLE_PAUSED = "isTablePaused"; - // Max time to wait for all LLC segments to complete committing their metadata while stopping the controller. private static final long MAX_LLC_SEGMENT_METADATA_COMMIT_TIME_MILLIS = 30_000L; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org