Greg Padgett has uploaded a new change for review. Change subject: agent: clean vdsm state if vm dies unexpectedly ......................................................................
agent: clean vdsm state if vm dies unexpectedly If the hosted engine vm dies unexpectedly, vdsm will hold onto some queryable state for the vm. An attempt to start the vm will be met with an error, "Virtual machine already exists". To prevent this, check for VM state and attempt to clean it before a startup attempt by using the vdsm 'destroy' verb. Change-Id: I52c8f41c28fbc1942a8b392b275359df57c1b5ef Bug-Url: https://bugzilla.redhat.com/1013638 Signed-off-by: Greg Padgett <gpadg...@redhat.com> --- M ovirt_hosted_engine_ha/agent/hosted_engine.py 1 file changed, 48 insertions(+), 0 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-hosted-engine-ha refs/changes/74/19874/1 diff --git a/ovirt_hosted_engine_ha/agent/hosted_engine.py b/ovirt_hosted_engine_ha/agent/hosted_engine.py index 89304ba..8d65180 100644 --- a/ovirt_hosted_engine_ha/agent/hosted_engine.py +++ b/ovirt_hosted_engine_ha/agent/hosted_engine.py @@ -32,6 +32,7 @@ from ..lib import exceptions as ex from ..lib import log_filter from ..lib import metadata +from ..lib import util from ..lib import vds_client as vdsc @@ -746,6 +747,9 @@ return self.States.ON, True def _start_engine_vm(self): + # Ensure there isn't any stale VDSM state from a prior VM lifecycle + self._clean_vdsm_state() + self._log.info("Starting vm using `%s --vm-start`", constants.HOSTED_ENGINE_BINARY) p = subprocess.Popen([constants.HOSTED_ENGINE_BINARY, @@ -769,6 +773,50 @@ self._log.error("Engine VM started on localhost") return + def _clean_vdsm_state(self): + """ + Query VDSM for stats on hosted engine VM, and if there are stats for + the VM but the VM is not running, attempt to clear them using the + VDSM 'destroy' verb. If after 10 tries the state is present, raise + an exception indicating the error. + """ + self._log.info("Ensuring VDSM state is clear for engine VM") + vm_id = self._config.get(config.VM, config.VM_UUID) + use_ssl = util.to_bool(self._config.get(config.ENGINE, + config.VDSM_SSL)) + + for i in range(0, 10): + # Loop until state is clear or until timeout + try: + stats = vdsc.run_vds_client_cmd('0', use_ssl, + 'getVmStats', vm_id) + except Exception as e: + if isinstance(e, ex.DetailedError) \ + and e.detail == "Virtual machine does not exist": + self._log.info("Vdsm state for VM clean") + return + else: + raise + + vm_status = stats['statsList'][0]['status'].lower() + if vm_status == 'powering up' or vm_status == 'up': + self._log.info("VM is running on host") + return + + self._log.info("Cleaning state for non-running VM") + try: + vdsc.run_vds_client_cmd('0', use_ssl, 'destroy', vm_id) + except Exception as e: + if isinstance(e, ex.DetailedError) \ + and e.detail == "Virtual machine does not exist": + self._log.info("Vdsm state for VM clean") + return + else: + raise + time.sleep(1) + + raise Exception("Timed out trying to clean VDSM state for VM") + @handler_cleanup def _handle_on(self): """ -- To view, visit http://gerrit.ovirt.org/19874 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I52c8f41c28fbc1942a8b392b275359df57c1b5ef Gerrit-PatchSet: 1 Gerrit-Project: ovirt-hosted-engine-ha Gerrit-Branch: master Gerrit-Owner: Greg Padgett <gpadg...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches