jackjlli commented on a change in pull request #7122: URL: https://github.com/apache/incubator-pinot/pull/7122#discussion_r663239718
########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/ControllerConf.java ########## @@ -99,7 +99,10 @@ "controller.minion.instances.cleanup.task.frequencyInSeconds"; public static final String MINION_INSTANCES_CLEANUP_TASK_INITIAL_DELAY_SECONDS = "controller.minion.instances.cleanup.task.initialDelaySeconds"; - public static final String TASK_METRICS_EMITTER_FREQUENCY_IN_SECONDS = "controller.minion.task.metrics.emitter.frequencyInSeconds"; + public static final String MINION_INSTANCES_CLEANUP_TASK_MAX_OFFLINE_TIME_RANGE_SECONDS = Review comment: it should be max time as the threshold right? If the instance hasn't been back for more than this threshold, znodes should be dropped. ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/MinionInstancesCleanupTask.java ########## @@ -23,36 +23,61 @@ import org.apache.pinot.common.metrics.ControllerGauge; import org.apache.pinot.common.metrics.ControllerMetrics; import org.apache.pinot.controller.ControllerConf; +import org.apache.pinot.controller.LeadControllerManager; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; import org.apache.pinot.controller.helix.core.PinotResourceManagerResponse; import org.apache.pinot.core.periodictask.BasePeriodicTask; import org.apache.pinot.spi.utils.CommonConstants; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A periodic task to clean up offline Minion instances to not spam Helix. */ public class MinionInstancesCleanupTask extends BasePeriodicTask { + private static final Logger LOGGER = LoggerFactory.getLogger(MinionInstancesCleanupTask.class); + private final static String TASK_NAME = "MinionInstancesCleanupTask"; protected final PinotHelixResourceManager _pinotHelixResourceManager; + protected final LeadControllerManager _leadControllerManager; protected final ControllerMetrics _controllerMetrics; + private final long _minionInstanceCleanupTaskMaxOfflineTimeRangeInMilliseconds; - public MinionInstancesCleanupTask(PinotHelixResourceManager pinotHelixResourceManager, ControllerConf controllerConf, - ControllerMetrics controllerMetrics) { - super("MinionInstancesCleanupTask", controllerConf.getMinionInstancesCleanupTaskFrequencyInSeconds(), + public MinionInstancesCleanupTask(PinotHelixResourceManager pinotHelixResourceManager, + LeadControllerManager leadControllerManager, ControllerConf controllerConf, ControllerMetrics controllerMetrics) { + super(TASK_NAME, controllerConf.getMinionInstancesCleanupTaskFrequencyInSeconds(), controllerConf.getMinionInstancesCleanupTaskInitialDelaySeconds()); _pinotHelixResourceManager = pinotHelixResourceManager; + _leadControllerManager = leadControllerManager; _controllerMetrics = controllerMetrics; + _minionInstanceCleanupTaskMaxOfflineTimeRangeInMilliseconds = + controllerConf.getMinionInstancesCleanupTaskMaxOfflineTimeRangeInSeconds() * 1000L; } @Override protected void runTask() { + // Make it so that only one controller returns the metric for all the tasks. + if (!_leadControllerManager.isLeaderForTable(TASK_NAME)) { + return; + } + List<String> offlineInstances = new ArrayList<>(_pinotHelixResourceManager.getAllInstances()); offlineInstances.removeAll(_pinotHelixResourceManager.getOnlineInstanceList()); for (String offlineInstance : offlineInstances) { + // Since ZNodes under "/LIVEINSTANCES" are ephemeral, if there is a ZK session expire (e.g. due to network issue), + // the ZNode under "/LIVEINSTANCES" will be deleted. Thus, such race condition can happen when this task is running. + // In order to double confirm the live status of an instance, the field "LAST_OFFLINE_TIME" in ZNode under + // "/INSTANCES/<instance_id>/HISTORY" needs to be checked. If the value is "-1", that means the instance is ONLINE; + // if the value is a timestamp, that means the instance starts to be OFFLINE since that time. if (offlineInstance.startsWith(CommonConstants.Helix.PREFIX_OF_MINION_INSTANCE)) { - PinotResourceManagerResponse response = _pinotHelixResourceManager.dropInstance(offlineInstance); - if (response.isSuccessful()) { - _controllerMetrics.addValueToGlobalGauge(ControllerGauge.DROPPED_MINION_INSTANCES, 1); + // Drop the minion instance if it has been offline for more than a period of this task. + if (_pinotHelixResourceManager + .isInstanceOffline(offlineInstance, _minionInstanceCleanupTaskMaxOfflineTimeRangeInMilliseconds)) { + LOGGER.info("Dropping minion instance: {}", offlineInstance); + PinotResourceManagerResponse response = _pinotHelixResourceManager.dropInstance(offlineInstance); + if (response.isSuccessful()) { + _controllerMetrics.addValueToGlobalGauge(ControllerGauge.DROPPED_MINION_INSTANCES, 1); Review comment: Sure, we can do that in a later PR. ########## File path: pinot-controller/src/main/java/org/apache/pinot/controller/ControllerConf.java ########## @@ -146,6 +149,7 @@ private static long getRandomInitialDelayInSeconds() { private static final int DEFAULT_STATUS_CONTROLLER_WAIT_FOR_PUSH_TIME_IN_SECONDS = 10 * 60; // 10 minutes private static final int DEFAULT_TASK_MANAGER_FREQUENCY_IN_SECONDS = -1; // Disabled private static final int DEFAULT_MINION_INSTANCES_CLEANUP_TASK_FREQUENCY_IN_SECONDS = 60 * 60; // 1 Hour. + private static final int DEFAULT_MINION_INSTANCES_CLEANUP_TASK_MAX_OFFLINE_TIME_RANGE_IN_SECONDS = 10 * 60; // 10 minutes Review comment: Think of it a bit, 0 would be too short, since this race condition could happen even for instances from k8s or docker. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org