mqliang commented on a change in pull request #7368: URL: https://github.com/apache/pinot/pull/7368#discussion_r718894821
########## File path: pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java ########## @@ -463,4 +490,71 @@ private long getWatermarkMs(long minStartTimeMs, long bucketMs, String mergeLeve return pinotTaskConfigs; } + + private long getMergeRollupTaskDelayInNumTimeBuckets(long watermarkMs, long bufferTimeMs, long bucketTimeMs) { + if (bufferTimeMs == 0 || watermarkMs == -1) { + return 0; + } + return (long) Math.floor((System.currentTimeMillis() - watermarkMs - bufferTimeMs) / (double) bucketTimeMs); + } + + private void setWatermarkMs(String tableNameWithType, String mergeLevel, long watermarkMs, long bufferTimeMs, + long bucketTimeMs) { + LOGGER.info( + "Setting watermark for table: {} and mergeLevel: {} is {} (watermarkMs={}, bufferTimeMs={}, bucketTimeMs={})", + tableNameWithType, mergeLevel, getMergeRollupTaskDelayInNumTimeBuckets(watermarkMs, bufferTimeMs, bucketTimeMs), + watermarkMs, bucketTimeMs, bucketTimeMs); + + ControllerMetrics controllerMetrics = _clusterInfoAccessor.getControllerMetrics(); + if (controllerMetrics == null) { + return; + } + + PinotMetricsRegistry metricsRegistry = controllerMetrics.getMetricsRegistry(); + if (metricsRegistry == null) { + return; + } + + // Update gauge value that indicates the delay in terms of the number of time buckets. + _mergeRollupWatermarks.computeIfAbsent(tableNameWithType, k -> new HashMap<>()); + Map<String, Long> watermarkForTable = _mergeRollupWatermarks.get(tableNameWithType); + + watermarkForTable.compute(mergeLevel, (k, v) -> { + if (v == null) { + controllerMetrics.addCallbackGaugeIfNeeded(getMetricNameForTaskDelay(tableNameWithType, mergeLevel), + (() -> getMergeRollupTaskDelayInNumTimeBuckets(watermarkForTable.getOrDefault(k, -1L), bufferTimeMs, + bucketTimeMs))); + } Review comment: Do we need add a `else` block here which remove the old CallbackGauge and add a new CallbackGauge when `v != null`? ########## File path: pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java ########## @@ -463,4 +490,71 @@ private long getWatermarkMs(long minStartTimeMs, long bucketMs, String mergeLeve return pinotTaskConfigs; } + + private long getMergeRollupTaskDelayInNumTimeBuckets(long watermarkMs, long bufferTimeMs, long bucketTimeMs) { + if (bufferTimeMs == 0 || watermarkMs == -1) { + return 0; + } + return (long) Math.floor((System.currentTimeMillis() - watermarkMs - bufferTimeMs) / (double) bucketTimeMs); Review comment: Is it possible that `bucketTimeMs ` be 0? which will cause "divide by zero" exception. ########## File path: pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java ########## @@ -339,6 +351,21 @@ public String getTaskType() { .info("Finished generating task configs for table: {} for task: {}, numTasks: {}", offlineTableName, taskType, pinotTaskConfigsForTable.size()); } + + // Reset watermarks for invalid tables. This covers the metrics clean up when the table is removed or the merge + // config is added and then removed. + LeadControllerManager leadControllerManager = _clusterInfoAccessor.getLeaderControllerManager(); Review comment: L385-L268 can be changed as ``` for (String tableNameWithType : _mergeRollupWatermarks.keySet()) { if (!leadControllerManager.isLeaderForTable(tableNameWithType)) { resetWatermarkMs(tableNameWithType); } } ``` Since _mergeRollupWatermarks is a HashMap, so looping over _mergeRollupWatermarks.keySet() will not iterating duplicate keys, so the `!candidateMergeTables.contains(tableNameWithType)` check is not needed. ########## File path: pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java ########## @@ -339,6 +351,21 @@ public String getTaskType() { .info("Finished generating task configs for table: {} for task: {}, numTasks: {}", offlineTableName, taskType, pinotTaskConfigsForTable.size()); } + + // Reset watermarks for invalid tables. This covers the metrics clean up when the table is removed or the merge + // config is added and then removed. + LeadControllerManager leadControllerManager = _clusterInfoAccessor.getLeaderControllerManager(); Review comment: L385-L268 can be changed as ``` for (String tableNameWithType : _mergeRollupWatermarks.keySet()) { if (!leadControllerManager.isLeaderForTable(tableNameWithType)) { resetWatermarkMs(tableNameWithType); } } ``` Since _mergeRollupWatermarks is a HashMap, looping over _mergeRollupWatermarks.keySet() will not iterating duplicate keys, so the `!candidateMergeTables.contains(tableNameWithType)` check is not needed. ########## File path: pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java ########## @@ -339,6 +351,21 @@ public String getTaskType() { .info("Finished generating task configs for table: {} for task: {}, numTasks: {}", offlineTableName, taskType, pinotTaskConfigsForTable.size()); } + + // Reset watermarks for invalid tables. This covers the metrics clean up when the table is removed or the merge + // config is added and then removed. + LeadControllerManager leadControllerManager = _clusterInfoAccessor.getLeaderControllerManager(); Review comment: L385-L268 can be changed as ``` for (String tableNameWithType : _mergeRollupWatermarks.keySet()) { if (!leadControllerManager.isLeaderForTable(tableNameWithType)) { resetWatermarkMs(tableNameWithType); } } ``` Since _mergeRollupWatermarks is a HashMap, looping over _mergeRollupWatermarks.keySet() will not produce duplicate keys, so the `!candidateMergeTables.contains(tableNameWithType)` check is not needed. ########## File path: pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java ########## @@ -463,4 +490,71 @@ private long getWatermarkMs(long minStartTimeMs, long bucketMs, String mergeLeve return pinotTaskConfigs; } + + private long getMergeRollupTaskDelayInNumTimeBuckets(long watermarkMs, long bufferTimeMs, long bucketTimeMs) { + if (bufferTimeMs == 0 || watermarkMs == -1) { + return 0; + } + return (long) Math.floor((System.currentTimeMillis() - watermarkMs - bufferTimeMs) / (double) bucketTimeMs); + } + + private void setWatermarkMs(String tableNameWithType, String mergeLevel, long watermarkMs, long bufferTimeMs, + long bucketTimeMs) { + LOGGER.info( + "Setting watermark for table: {} and mergeLevel: {} is {} (watermarkMs={}, bufferTimeMs={}, bucketTimeMs={})", + tableNameWithType, mergeLevel, getMergeRollupTaskDelayInNumTimeBuckets(watermarkMs, bufferTimeMs, bucketTimeMs), + watermarkMs, bucketTimeMs, bucketTimeMs); + + ControllerMetrics controllerMetrics = _clusterInfoAccessor.getControllerMetrics(); + if (controllerMetrics == null) { + return; + } + + PinotMetricsRegistry metricsRegistry = controllerMetrics.getMetricsRegistry(); + if (metricsRegistry == null) { + return; + } + + // Update gauge value that indicates the delay in terms of the number of time buckets. + _mergeRollupWatermarks.computeIfAbsent(tableNameWithType, k -> new HashMap<>()); + Map<String, Long> watermarkForTable = _mergeRollupWatermarks.get(tableNameWithType); + + watermarkForTable.compute(mergeLevel, (k, v) -> { + if (v == null) { + controllerMetrics.addCallbackGaugeIfNeeded(getMetricNameForTaskDelay(tableNameWithType, mergeLevel), + (() -> getMergeRollupTaskDelayInNumTimeBuckets(watermarkForTable.getOrDefault(k, -1L), bufferTimeMs, + bucketTimeMs))); + } + return watermarkMs; + }); + } + + /** + * Reset the watermark for the given table name + * @param tableNameWithType a table name with type + */ + private void resetWatermarkMs(String tableNameWithType) { + ControllerMetrics controllerMetrics = _clusterInfoAccessor.getControllerMetrics(); + if (controllerMetrics == null) { + return; + } + PinotMetricsRegistry metricsRegistry = controllerMetrics.getMetricsRegistry(); + if (metricsRegistry == null) { + return; + } + + // Delete all the watermarks associated with the given table name + Map<String, Long> watermarksForTable = _mergeRollupWatermarks.get(tableNameWithType); Review comment: > (nit) Directly call _mergeRollupWatermarks.remove(tableNameWithType); The current logic is correct IMO. As we need remove the CallbackGauge from controllerMetrics also. Directly calling ` _mergeRollupWatermarks.remove(tableNameWithType);` only remove the watermarksForTable map from `_mergeRollupWatermarks` , but the CallbackGauge is still there. ########## File path: pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/mergerollup/MergeRollupTaskGenerator.java ########## @@ -463,4 +490,71 @@ private long getWatermarkMs(long minStartTimeMs, long bucketMs, String mergeLeve return pinotTaskConfigs; } + + private long getMergeRollupTaskDelayInNumTimeBuckets(long watermarkMs, long bufferTimeMs, long bucketTimeMs) { + if (bufferTimeMs == 0 || watermarkMs == -1) { + return 0; + } + return (long) Math.floor((System.currentTimeMillis() - watermarkMs - bufferTimeMs) / (double) bucketTimeMs); + } + + private void setWatermarkMs(String tableNameWithType, String mergeLevel, long watermarkMs, long bufferTimeMs, + long bucketTimeMs) { + LOGGER.info( + "Setting watermark for table: {} and mergeLevel: {} is {} (watermarkMs={}, bufferTimeMs={}, bucketTimeMs={})", + tableNameWithType, mergeLevel, getMergeRollupTaskDelayInNumTimeBuckets(watermarkMs, bufferTimeMs, bucketTimeMs), + watermarkMs, bucketTimeMs, bucketTimeMs); + + ControllerMetrics controllerMetrics = _clusterInfoAccessor.getControllerMetrics(); + if (controllerMetrics == null) { + return; + } + + PinotMetricsRegistry metricsRegistry = controllerMetrics.getMetricsRegistry(); + if (metricsRegistry == null) { + return; + } + + // Update gauge value that indicates the delay in terms of the number of time buckets. + _mergeRollupWatermarks.computeIfAbsent(tableNameWithType, k -> new HashMap<>()); + Map<String, Long> watermarkForTable = _mergeRollupWatermarks.get(tableNameWithType); + + watermarkForTable.compute(mergeLevel, (k, v) -> { + if (v == null) { + controllerMetrics.addCallbackGaugeIfNeeded(getMetricNameForTaskDelay(tableNameWithType, mergeLevel), + (() -> getMergeRollupTaskDelayInNumTimeBuckets(watermarkForTable.getOrDefault(k, -1L), bufferTimeMs, + bucketTimeMs))); + } + return watermarkMs; + }); + } + + /** + * Reset the watermark for the given table name + * @param tableNameWithType a table name with type + */ + private void resetWatermarkMs(String tableNameWithType) { + ControllerMetrics controllerMetrics = _clusterInfoAccessor.getControllerMetrics(); + if (controllerMetrics == null) { + return; + } + PinotMetricsRegistry metricsRegistry = controllerMetrics.getMetricsRegistry(); + if (metricsRegistry == null) { + return; + } + + // Delete all the watermarks associated with the given table name + Map<String, Long> watermarksForTable = _mergeRollupWatermarks.get(tableNameWithType); Review comment: > (nit) Directly call _mergeRollupWatermarks.remove(tableNameWithType); The current logic is correct IIUC. As we need remove the CallbackGauge from controllerMetrics also. Directly calling ` _mergeRollupWatermarks.remove(tableNameWithType);` only remove the watermarksForTable map from `_mergeRollupWatermarks` , but the CallbackGauge is still there. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org