Greg Padgett has uploaded a new change for review. Change subject: agent, broker: progressive score penalty for high cpu load ......................................................................
agent, broker: progressive score penalty for high cpu load Penalize the host score progressively based on cpu load and duration of that load. The host load used doesn't count the load incurred by the engine vm, in order to help keep host scores stable if the vm changes hosts. Additional related changes include not considering memory used by the vm in the memory score, as well as logging changes to make it easier to tell what the various factors were in generating the current score. Change-Id: I9cb205f5eb3729a17692b8ab854c58ddfe862b2f Bug-Url: https://bugzilla.redhat.com/1034205 Signed-off-by: Greg Padgett <gpadg...@redhat.com> --- M ovirt_hosted_engine_ha/agent/constants.py.in M ovirt_hosted_engine_ha/agent/hosted_engine.py M ovirt_hosted_engine_ha/broker/submonitors/Makefile.am M ovirt_hosted_engine_ha/broker/submonitors/cpu_load.py A ovirt_hosted_engine_ha/broker/submonitors/cpu_load_no_engine.py M ovirt_hosted_engine_ha/env/config.py 6 files changed, 264 insertions(+), 32 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/45/21645/1 diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in b/ovirt_hosted_engine_ha/agent/constants.py.in index fc689e6..b618db1 100644 --- a/ovirt_hosted_engine_ha/agent/constants.py.in +++ b/ovirt_hosted_engine_ha/agent/constants.py.in @@ -44,6 +44,12 @@ MAX_DOMAIN_MONITOR_WAIT_SECS = 240 METADATA_LOG_PERIOD_SECS = 600 +BASE_SCORE = 2400 +GATEWAY_SCORE_PENALTY = 1600 +MGMT_BRIDGE_SCORE_PENALTY = 600 +FREE_VM_MEMORY_SCORE_PENALTY = 400 +CPU_LOAD_SCORE_PENALTY = 1000 + VDSM_USER = '@VDSM_USER@' VDSM_GROUP = '@VDSM_GROUP@' diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py b/ovirt_hosted_engine_ha/agent/hosted_engine.py index 31f21d0..39a87d3 100644 --- a/ovirt_hosted_engine_ha/agent/hosted_engine.py +++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py @@ -17,6 +17,7 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # +import copy import errno import logging import os @@ -60,6 +61,7 @@ LF_GLOBAL_MD_ERROR_INT = 900 LF_MAINTENANCE = 'LF_MAINTENANCE' LF_MAINTENANCE_INT = 900 + LF_PENALTY_INT = 60 MIGRATION_THRESHOLD_SCORE = 800 @@ -169,17 +171,11 @@ }) req.append({ 'field': 'cpu-load', - 'monitor': 'cpu-load', + 'monitor': 'cpu-load-no-engine', 'options': { 'address': '0', - 'use_ssl': self._config.get(config.ENGINE, config.VDSM_SSL)} - }) - req.append({ - 'field': 'mem-load', - 'monitor': 'mem-load', - 'options': { - 'address': '0', - 'use_ssl': self._config.get(config.ENGINE, config.VDSM_SSL)} + 'use_ssl': self._config.get(config.ENGINE, config.VDSM_SSL), + 'vm_uuid': self._config.get(config.VM, config.VM_UUID)} }) req.append({ 'field': 'engine-health', @@ -209,6 +205,10 @@ # Local timestamp of last metadata logging self._rinfo['last-metadata-log-time'] = 0 + + # CPU load tracking info + self._rinfo['last-load-update-time'] = 0 + self._rinfo['cpu-load-history'] = [] # Host id of local host self._rinfo['host-id'] = int(self._config.get(config.ENGINE, @@ -568,38 +568,85 @@ return default lm = self._local_monitors + ts = int(time.time()) - score = 0 + score = constants.BASE_SCORE # FIXME score needed for vdsm storage pool connection? # (depending on storage integration, may not be able to report...) - score += 1000 * (1 if lm['gateway']['status'] == 'True' else 0) - score += 800 * (1 if lm['bridge']['status'] == 'True' else 0) - score += 400 * (1 if float_or_default(lm['mem-free']['status'], 0) - >= 4096.0 else 0) - score += 100 * (1 if float_or_default(lm['cpu-load']['status'], 1) - < 0.8 else 0) - score += 100 * (1 if float_or_default(lm['mem-load']['status'], 1) - < 0.8 else 0) + + if lm['gateway']['status'] == 'False': + self._log.info("Penalizing score by %d due to gateway status", + constants.GATEWAY_SCORE_PENALTY, + extra=log_filter.lf_args('score-gateway', + self.LF_PENALTY_INT)) + score -= constants.GATEWAY_SCORE_PENALTY + + if lm['bridge']['status'] == 'False': + self._log.info("Penalizing score by %d due to mgmt bridge status", + constants.MGMT_BRIDGE_SCORE_PENALTY, + extra=log_filter.lf_args('score-mgmtbridge', + self.LF_PENALTY_INT)) + score -= constants.MGMT_BRIDGE_SCORE_PENALTY + + # Record 15 minute cpu load history (not counting load caused by + # the engine vm. The default load penalty is: + # load 0-40% : 0 + # load 40-90%: 0 to 1000 (rising linearly with increasing load) + # load 90%+ : 1000 + # Thus, a load of 80% causes an 800 point penalty + if util.has_elapsed(self._rinfo['last-load-update-time'], 60) \ + and lm['cpu-load']['status'] != 'None': + if len(self._rinfo['cpu-load-history']) == 15: + self._rinfo['cpu-load-history'].pop(0) + self._rinfo['cpu-load-history'] \ + .append(float(lm['cpu-load']['status'])) + self._rinfo['last-load-update-time'] = ts + load_factor = sum(self._rinfo['cpu-load-history']) / 15 + penalty = int((load_factor - 0.4) * 2.0 + * constants.CPU_LOAD_SCORE_PENALTY) + penalty = max(0, min(constants.CPU_LOAD_SCORE_PENALTY, penalty)) + if penalty > 0: + self._log.info("Penalizing score by %d due to cpu load", + penalty, + extra=log_filter.lf_args('score-cpu', + self.LF_PENALTY_INT)) + score -= penalty + + # Penalize for free mem only if the VM is not on this host + vm_mem = int(self._config.get(config.VM, config.MEM_SIZE)) + if self._rinfo['current-state'] != self.States.ON \ + and float_or_default(lm['mem-free']['status'], 0) < vm_mem: + self._log.info('Penalizing score by %d due to low free memory', + constants.FREE_VM_MEMORY_SCORE_PENALTY, + extra=log_filter.lf_args('score-memory', + self.LF_PENALTY_INT)) + score -= constants.FREE_VM_MEMORY_SCORE_PENALTY # If too many retries occur, give a less-suited host a chance if (self._rinfo['engine-vm-retry-count'] > constants.ENGINE_RETRY_COUNT): - self._log.info('Score is 0 due to {0} engine vm retry attempts', - self._rinfo['engine-vm-retry-count']) + self._log.info('Score is 0 due to %d engine vm retry attempts', + self._rinfo['engine-vm-retry-count'], + extra=log_filter.lf_args('score-retries', + self.LF_PENALTY_INT)) score = 0 elif self._rinfo['engine-vm-retry-count'] > 0: # Subtracting a small amount each time causes round-robin attempts # between hosts that are otherwise equally suited to run the engine penalty = 50 * self._rinfo['engine-vm-retry-count'] - self._log.info('Penalizing score by {0}' - ' due to {1} engine vm retry attempts', - penalty, self._rinfo['engine-vm-retry-count']) - score = max(0, score - penalty) + self._log.info('Penalizing score by %d' + ' due to %d engine vm retry attempts', + penalty, self._rinfo['engine-vm-retry-count'], + extra=log_filter.lf_args('score-retries', + self.LF_PENALTY_INT)) + score -= penalty # If engine has bad health status, let another host try if self._rinfo['bad-health-failure-time']: - self._log.info('Score is 0 due to bad engine health at {0}', - time.ctime(self._rinfo['bad-health-failure-time'])) + self._log.info('Score is 0 due to bad engine health at %s', + time.ctime(self._rinfo['bad-health-failure-time']), + extra=log_filter.lf_args('score-health', + self.LF_PENALTY_INT)) score = 0 # If the VM shut down unexpectedly (user command, died, etc.), drop the @@ -607,16 +654,21 @@ # shortcut for the user to start host maintenance mode, though it still # should be set manually lest the score recover after a timeout. if self._rinfo['unexpected-shutdown-time']: - self._log.info('Score is 0 due to unexpected vm shutdown at {0}', - time.ctime(self._rinfo['unexpected-shutdown-time'])) + self._log.info('Score is 0 due to unexpected vm shutdown at %s', + time.ctime(self._rinfo['unexpected-shutdown-time']), + extra=log_filter.lf_args('score-shutdown', + self.LF_PENALTY_INT)) score = 0 # Hosts in local maintenance mode should not run the vm if self._get_maintenance_mode() == self.MaintenanceMode.LOCAL: - self._log.info('Score is 0 due to local maintenance mode') + self._log.info('Score is 0 due to local maintenance mode', + extra=log_filter.lf_args('score-maintenance', + self.LF_PENALTY_INT)) score = 0 - ts = int(time.time()) + score = max(0, score) + data = ("{md_parse_vers}|{md_feature_vers}|{ts_int}" "|{host_id}|{score}|{engine_status}|{name}" .format(md_parse_vers=constants.METADATA_PARSE_VERSION, @@ -702,10 +754,11 @@ 'engine-status': None, 'hostname': '(unknown)'} + self._prior_host_stats[md['host-id']] = \ + copy.copy(self._all_host_stats[md['host-id']]) + if self._all_host_stats[md['host-id']]['last-update-host-ts'] \ != md['host-ts']: - self._prior_host_stats[md['host-id']] = \ - self._all_host_stats[md['host-id']] # Track first update in order to accurately judge liveness. # If last-update-host-ts is 0, then first-update stays True # which indicates that we cannot use this last-update-local-ts diff --git a/ovirt_hosted_engine_ha/broker/submonitors/Makefile.am b/ovirt_hosted_engine_ha/broker/submonitors/Makefile.am index ed58d94..edc8fb7 100644 --- a/ovirt_hosted_engine_ha/broker/submonitors/Makefile.am +++ b/ovirt_hosted_engine_ha/broker/submonitors/Makefile.am @@ -29,6 +29,7 @@ dist_brokersubmonitors_PYTHON = \ cpu_load.py \ + cpu_load_no_engine.py \ engine_health.py \ mem_free.py \ mem_load.py \ diff --git a/ovirt_hosted_engine_ha/broker/submonitors/cpu_load.py b/ovirt_hosted_engine_ha/broker/submonitors/cpu_load.py index 359949a..f037896 100644 --- a/ovirt_hosted_engine_ha/broker/submonitors/cpu_load.py +++ b/ovirt_hosted_engine_ha/broker/submonitors/cpu_load.py @@ -28,6 +28,10 @@ class Submonitor(submonitor_base.SubmonitorBase): def action(self, options): + """ + Return the one-minute load average, normalized as a ratio of load + to number of CPUs. + """ load_per_thread = 1.0 p = subprocess.Popen(['cat', '/proc/cpuinfo'], stdout=subprocess.PIPE) out = p.communicate()[0] diff --git a/ovirt_hosted_engine_ha/broker/submonitors/cpu_load_no_engine.py b/ovirt_hosted_engine_ha/broker/submonitors/cpu_load_no_engine.py new file mode 100644 index 0000000..7f55dfa --- /dev/null +++ b/ovirt_hosted_engine_ha/broker/submonitors/cpu_load_no_engine.py @@ -0,0 +1,167 @@ +# +# ovirt-hosted-engine-ha -- ovirt hosted engine high availability +# Copyright (C) 2013 Red Hat, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# + +import logging +import time +from collections import namedtuple + +from ovirt_hosted_engine_ha.broker import submonitor_base +from ovirt_hosted_engine_ha.lib import exceptions as exceptions +from ovirt_hosted_engine_ha.lib import log_filter +from ovirt_hosted_engine_ha.lib import util as util +from ovirt_hosted_engine_ha.lib import vds_client as vdsc + +Ticks = namedtuple('Ticks', 'total, busy') + + +def register(): + return "cpu-load-no-engine" + + +class Submonitor(submonitor_base.SubmonitorBase): + def setup(self, options): + self._log = logging.getLogger("EngineHealth") + self._log.addFilter(log_filter.IntermittentFilter()) + + self._address = options.get('address') + self._use_ssl = util.to_bool(options.get('use_ssl')) + self._vm_uuid = options.get('vm_uuid') + if (self._address is None + or self._use_ssl is None + or self._vm_uuid is None): + raise Exception("cpu-load-no-engine requires" + " address, use_ssl, and vm_uuid") + self._log.debug("address=%s, use_ssl=%r, vm_uuid=%s", + self._address, self._use_ssl, self._vm_uuid) + + self.engine_pid = None + self.engine_pid_start_time = None + self.proc_stat = None + + self.system = {'prev': None, 'cur': None} + self.vm = {'prev': None, 'cur': None} + self.latest_report_ts = None + self.load = 0.0 + + def action(self, options): + """ + Return the one-minute load average, normalized as a ratio of load to + number of CPUs, and without the impact of load from the engine VM. + """ + if self.latest_report_ts is None: + # For first reading, take 10-second average + self.refresh_ticks() + time.sleep(10) + elif not util.has_elapsed(self.latest_report_ts, 60): + return self.load + + self.refresh_ticks() + self.calculate_load() + self.update_result("{0:.4f}".format(self.load)) + self.latest_report_ts = time.time() + + def refresh_ticks(self): + self.update_stat_file() + self.vm['prev'] = self.vm['cur'] + self.vm['cur'] = self.get_vm_busy_ticks() + self.system['prev'] = self.system['cur'] + self.system['cur'] = self.get_system_ticks() + self._log.debug("Ticks: total={0}, busy={1}, vm={2}" + .format(self.system['cur'].total, + self.system['cur'].busy, + self.vm['cur'])) + + def get_system_ticks(self): + with open('/proc/stat', 'r') as f: + cpu = f.readline() + fields = [int(x) for x in cpu.split()[1:]] + total = sum(fields) + busy = sum(fields[:3]) + return Ticks(total, busy) + + def get_vm_busy_ticks(self): + if self.proc_stat is None: + return None + return int(self.proc_stat[13]) + int(self.proc_stat[14]) + + def calculate_load(self): + dtotal = self.system['cur'].total - self.system['prev'].total + dbusy = self.system['cur'].busy - self.system['prev'].busy + load = dbusy / float(dtotal) + + if self.vm['cur'] is not None and self.vm['prev'] is not None: + dvm = self.vm['cur'] - self.vm['prev'] + # The total jiffie delta is a good-enough approximation + engine_load = dvm / float(dtotal) + engine_load = max(engine_load, 0.0) + else: + engine_load = 0.0 + + load_no_engine = load - engine_load + load_no_engine = max(load_no_engine, 0.0) + + self._log.info("System load" + " total={0:.4f}, engine={1:.4f}, non-engine={2:.4f}" + .format(load, engine_load, load_no_engine)) + self.load = load_no_engine + + def update_stat_file(self): + if self.engine_pid: + # Try the known pid and verify it's the same process + fname = '/proc/{0}/stat'.format(self.engine_pid) + try: + with open(fname, 'r') as f: + self.proc_stat = f.readline().split() + except Exception: + self.proc_stat = None + else: + if int(self.proc_stat[21]) == self.engine_pid_start_time: + self._log.debug("VM on this host, pid %d", self.engine_pid, + extra=log_filter.lf_args('vm', 60)) + else: + # This isn't the engine qemu process... + self.proc_stat = None + + if self.proc_stat is None: + # Look for the engine vm pid and try to get the stats + self.engine_pid = None + self.engine_pid_start_time = None + try: + stats = vdsc.run_vds_client_cmd(self._address, self._use_ssl, + 'getVmStats', self._vm_uuid) + pid = int(stats['statsList'][0]['pid']) + except Exception as e: + if isinstance(e, exceptions.DetailedError) \ + and e.detail == "Virtual machine does not exist": + self._log.info("VM not on this host", + extra=log_filter.lf_args('vm', 60)) + else: + self._log.error("Failed to getVmStats: %s", str(e), + extra=log_filter.lf_args('vm', 60)) + else: + fname = '/proc/{0}/stat'.format(pid) + try: + with open(fname, 'r') as f: + self.proc_stat = f.readline().split() + self.engine_pid_start_time = int(self.proc_stat[21]) + self.engine_pid = pid + except Exception as e: + # Try again next time + self._log.error("Failed to read vm stats: %s", str(e), + extra=log_filter.lf_args('vm', 60)) diff --git a/ovirt_hosted_engine_ha/env/config.py b/ovirt_hosted_engine_ha/env/config.py index e17534f..ef7c708 100644 --- a/ovirt_hosted_engine_ha/env/config.py +++ b/ovirt_hosted_engine_ha/env/config.py @@ -34,6 +34,7 @@ # constants for vm.conf options VM = 'vm' VM_UUID = 'vmId' +MEM_SIZE = 'memSize' # constants for ha.conf options HA = 'ha' -- To view, visit http://gerrit.ovirt.org/21645 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I9cb205f5eb3729a17692b8ab854c58ddfe862b2f Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: master Gerrit-Owner: Greg Padgett <gpadg...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches