Jiří Moskovčák has uploaded a new change for review. Change subject: added EngineStarting state ......................................................................
added EngineStarting state when then VM with engine is starting it takes some time and goes thru states "waitforlunch" and "powering up". During the start up the engine was marked as UnexpectedlyDown because it wasn't 'up' and the host got score 0 which might lead to killing the VM while it's starting and trying to start it on another host where the same situation repeats. Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1093638 Change-Id: Ia7e5bb28eeb839b684be74ea768265d9dab01175 Signed-off-by: Jiri Moskovcak <jmosk...@redhat.com> --- M ovirt_hosted_engine_ha/agent/constants.py.in M ovirt_hosted_engine_ha/agent/states.py M ovirt_hosted_engine_ha/broker/submonitors/engine_health.py M ovirt_hosted_engine_ha/lib/Makefile.am A ovirt_hosted_engine_ha/lib/engine.py 5 files changed, 83 insertions(+), 7 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/25/30025/1 diff --git a/ovirt_hosted_engine_ha/agent/constants.py.in b/ovirt_hosted_engine_ha/agent/constants.py.in index 3c28849..56a0851 100644 --- a/ovirt_hosted_engine_ha/agent/constants.py.in +++ b/ovirt_hosted_engine_ha/agent/constants.py.in @@ -54,6 +54,7 @@ MAX_VDSM_START_RETRIES = 5 MAX_DOMAIN_MONITOR_WAIT_SECS = 240 METADATA_LOG_PERIOD_SECS = 600 +ENGINE_STARTING_TIMEOUT = 600 BASE_SCORE = 2400 GATEWAY_SCORE_PENALTY = 1600 diff --git a/ovirt_hosted_engine_ha/agent/states.py b/ovirt_hosted_engine_ha/agent/states.py index a75994d..effb782 100644 --- a/ovirt_hosted_engine_ha/agent/states.py +++ b/ovirt_hosted_engine_ha/agent/states.py @@ -2,6 +2,7 @@ from ..lib.fsm import BaseState, BaseFSM from ..lib import log_filter +from ..lib import engine from . import constants from .state_decorators import check_local_maintenance, check_timeout from .state_decorators import check_local_vm_unknown, check_global_maintenance @@ -655,7 +656,7 @@ :transition GlobalMaintenance: :transition UnknownLocalVmState: :transition LocalMaintenance: - :transition EngineUp: + :transition EngineStarting: :transition EngineDown: """ @check_global_maintenance(GlobalMaintenance) @@ -673,7 +674,7 @@ new_data = new_data._replace( engine_vm_retry_time=dtime(new_data), engine_vm_retry_count=0) - return EngineUp(new_data) + return EngineStarting(new_data) else: new_data = new_data._replace( engine_vm_retry_time=dtime(new_data), @@ -681,6 +682,39 @@ return EngineDown(new_data) +class EngineStarting(EngineState): + """ + This state is responsible for starting the VM on the local machine. + + :transition GlobalMaintenance: + :transition UnknownLocalVmState: + :transition LocalMaintenance: + :transition EngineUp: + :transition EngineDown: + """ + @check_global_maintenance(GlobalMaintenance) + @check_local_vm_unknown(UnknownLocalVmState) + @check_local_maintenance(LocalMaintenance) + @check_timeout(EngineStop, constants.ENGINE_STARTING_TIMEOUT, + BaseFSM.WAIT) + def consume(self, fsm, new_data, logger): + """ + :type fsm: BaseFSM + :type new_data: HostedEngineData + :type logger: logging.Logger + """ + + # engine is running + if new_data.best_engine_status["vm"] == engine.VMState.UP: + if new_data.best_engine_status["health"] == 'good': + return EngineUp(new_data) + else: + logger.info("VM is powering up..") + return EngineStarting(new_data) + + return EngineUnexpectedlyDown(new_data) + + class EngineMigratingAway(EngineState): """ This state is responsible for monitoring a migration of the engine diff --git a/ovirt_hosted_engine_ha/broker/submonitors/engine_health.py b/ovirt_hosted_engine_ha/broker/submonitors/engine_health.py index 7557a50..d41337c 100644 --- a/ovirt_hosted_engine_ha/broker/submonitors/engine_health.py +++ b/ovirt_hosted_engine_ha/broker/submonitors/engine_health.py @@ -27,6 +27,7 @@ from ovirt_hosted_engine_ha.lib import log_filter from ovirt_hosted_engine_ha.lib import util as util from ovirt_hosted_engine_ha.lib import vds_client as vdsc +from ovirt_hosted_engine_ha.lib import engine def register(): @@ -74,10 +75,15 @@ # Report states that are not really Up, but should be # reported as such - if vm_status in ('paused', 'waitforlaunch', 'restoringstate'): + if vm_status in ('paused', + 'waitforlaunch', + 'restoringstate', + 'powering up'): self._log.info("VM status: %s", vm_status, extra=log_filter.lf_args('status', 60)) - d = {'vm': 'up', 'health': 'bad', 'detail': vm_status, + d = {'vm': engine.VMState.UP, + 'health': engine.Health.BAD, + 'detail': vm_status, 'reason': 'bad vm status'} self.update_result(json.dumps(d)) return @@ -86,7 +92,9 @@ if vm_status in ('down', 'migration destination'): self._log.info("VM not running on this host, status %s", vm_status, extra=log_filter.lf_args('status', 60)) - d = {'vm': 'down', 'health': 'bad', 'detail': vm_status, + d = {'vm': engine.VMState.DOWN, + 'health': engine.Health.BAD, + 'detail': vm_status, 'reason': 'bad vm status'} self.update_result(json.dumps(d)) return @@ -100,11 +108,15 @@ if p.returncode == 0: self._log.info("VM is up on this host with healthy engine", extra=log_filter.lf_args('status', 60)) - d = {'vm': 'up', 'health': 'good', 'detail': vm_status} + d = {'vm': engine.VMState.UP, + 'health': engine.Health.GOOD, + 'detail': vm_status} self.update_result(json.dumps(d)) else: self._log.warning("bad health status: %s", output[0]) - d = {'vm': 'up', 'health': 'bad', 'detail': vm_status, + d = {'vm': engine.VMState.UP, + 'health': engine.Health.BAD, + 'detail': vm_status, 'reason': 'failed liveliness check'} self.update_result(json.dumps(d)) diff --git a/ovirt_hosted_engine_ha/lib/Makefile.am b/ovirt_hosted_engine_ha/lib/Makefile.am index 7015e31..427b28e 100644 --- a/ovirt_hosted_engine_ha/lib/Makefile.am +++ b/ovirt_hosted_engine_ha/lib/Makefile.am @@ -37,6 +37,7 @@ util.py \ vds_client.py \ monotonic.py \ + engine.py \ $(NULL) SUBDIRS = \ diff --git a/ovirt_hosted_engine_ha/lib/engine.py b/ovirt_hosted_engine_ha/lib/engine.py new file mode 100644 index 0000000..cba520c --- /dev/null +++ b/ovirt_hosted_engine_ha/lib/engine.py @@ -0,0 +1,28 @@ +# +# ovirt-hosted-engine-ha -- ovirt hosted engine high availability +# Copyright (C) 2013 Red Hat, Inc. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# + + +class VMState(object): + UP = "up" + DOWN = "down" + + +class Health(object): + GOOD = "good" + BAD = "bad" -- To view, visit http://gerrit.ovirt.org/30025 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia7e5bb28eeb839b684be74ea768265d9dab01175 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: master Gerrit-Owner: Jiří Moskovčák <jmosk...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches