This is an automated email from the ASF dual-hosted git repository. snlee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push: new 880a5c779f emit minion task generation time and error metrics (#10026) 880a5c779f is described below commit 880a5c779fc441124ea14013a52d966885a58074 Author: Haitao Zhang <hai...@startree.ai> AuthorDate: Fri Dec 23 14:49:44 2022 -0800 emit minion task generation time and error metrics (#10026) * emit minion task generation time and error metrics * address comments --- .../jmx_prometheus_javaagent/configs/controller.yml | 14 ++++++++++++++ .../etc/jmx_prometheus_javaagent/configs/pinot.yml | 14 ++++++++++++++ .../pinot/common/metrics/ControllerGauge.java | 2 ++ .../helix/core/minion/PinotTaskManager.java | 21 ++++++++++++++++++--- 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml index edf96a9f2a..d258161b42 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml @@ -119,6 +119,20 @@ rules: table: "$1" tableType: "$2" taskType: "$3" +- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.timeMsSinceLastSuccessfulMinionTaskGeneration.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)" + name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$4" + cache: true + labels: + table: "$1" + tableType: "$2" + taskType: "$3" +- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.lastMinionTaskGenerationEncountersError.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)" + name: "pinot_controller_lastMinionTaskGenerationEncountersError_$4" + cache: true + labels: + table: "$1" + tableType: "$2" + taskType: "$3" - pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)" name: "pinot_controller_pinotLeadControllerResourceEnabled_$1" cache: true diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml index 6a63001bfc..27a78730db 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml @@ -108,6 +108,20 @@ rules: table: "$1" tableType: "$2" taskType: "$3" +- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.timeMsSinceLastSuccessfulMinionTaskGeneration.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)" + name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$4" + cache: true + labels: + table: "$1" + tableType: "$2" + taskType: "$3" +- pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.lastMinionTaskGenerationEncountersError.(\\w+)_(\\w+)\\.(\\w+)\"><>(\\w+)" + name: "pinot_controller_lastMinionTaskGenerationEncountersError_$4" + cache: true + labels: + table: "$1" + tableType: "$2" + taskType: "$3" - pattern: "\"org.apache.pinot.common.metrics\"<type=\"ControllerMetrics\", name=\"pinot.controller.pinotLeadControllerResourceEnabled\"><>(\\w+)" name: "pinot_controller_pinotLeadControllerResourceEnabled_$1" cache: true diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java index 5df7959d5c..b44bd8e999 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java @@ -53,6 +53,8 @@ public enum ControllerGauge implements AbstractMetrics.Gauge { DISABLED_TABLE_COUNT("TableCount", true), PERIODIC_TASK_NUM_TABLES_PROCESSED("PeriodicTaskNumTablesProcessed", true), TIME_MS_SINCE_LAST_MINION_TASK_METADATA_UPDATE("TimeMsSinceLastMinionTaskMetadataUpdate", false), + TIME_MS_SINCE_LAST_SUCCESSFUL_MINION_TASK_GENERATION("TimeMsSinceLastSuccessfulMinionTaskGeneration", false), + LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR("LastMinionTaskGenerationEncountersError", false), NUM_MINION_TASKS_IN_PROGRESS("NumMinionTasksInProgress", true), NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true), NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true), diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java index e8bb3cffd5..04ba148adb 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java @@ -541,20 +541,35 @@ public class PinotTaskManager extends ControllerPeriodicTask<Void> { generateTasks() return a list of TaskGeneratorMostRecentRunInfo for each table */ pinotTaskConfigs = taskGenerator.generateTasks(enabledTableConfigs); + long successRunTimestamp = System.currentTimeMillis(); for (TableConfig tableConfig : enabledTableConfigs) { _taskManagerStatusCache.saveTaskGeneratorInfo(tableConfig.getTableName(), taskGenerator.getTaskType(), - taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addSuccessRunTs( - System.currentTimeMillis())); + taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addSuccessRunTs(successRunTimestamp)); + // before the first task schedule, the follow two gauge metrics will be empty + // TODO: find a better way to report task generation information + _controllerMetrics.addOrUpdateGauge( + ControllerGauge.TIME_MS_SINCE_LAST_SUCCESSFUL_MINION_TASK_GENERATION.getGaugeName() + "." + + tableConfig.getTableName() + "." + taskGenerator.getTaskType(), + () -> System.currentTimeMillis() - successRunTimestamp); + _controllerMetrics.addOrUpdateGauge( + ControllerGauge.LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR.getGaugeName() + "." + + tableConfig.getTableName() + "." + taskGenerator.getTaskType(), () -> 0L); } } catch (Exception e) { StringWriter errors = new StringWriter(); try (PrintWriter pw = new PrintWriter(errors)) { e.printStackTrace(pw); } + long successRunTimestamp = System.currentTimeMillis(); for (TableConfig tableConfig : enabledTableConfigs) { _taskManagerStatusCache.saveTaskGeneratorInfo(tableConfig.getTableName(), taskGenerator.getTaskType(), taskGeneratorMostRecentRunInfo -> taskGeneratorMostRecentRunInfo.addErrorRunMessage( - System.currentTimeMillis(), errors.toString())); + successRunTimestamp, errors.toString())); + // before the first task schedule, the follow gauge metric will be empty + // TODO: find a better way to report task generation information + _controllerMetrics.addOrUpdateGauge( + ControllerGauge.LAST_MINION_TASK_GENERATION_ENCOUNTERS_ERROR.getGaugeName() + "." + + tableConfig.getTableName() + "." + taskGenerator.getTaskType(), () -> 1L); } throw e; } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org