Arik Hadas has uploaded a new change for review. Change subject: core: prevent possible deadlock between monitoring threads ......................................................................
core: prevent possible deadlock between monitoring threads If VM went down during migration in the source host, the monitoring (VURTI) thread of the source host tries to destroy the VM in the destination host. In order to destroy the VM in the destination host, it tries to lock the destination host. When we have two monitoring threads that try to do it, we end-up in a deadlock because each of them locks the host it monitors and tries to lock the other one. This patch solves this problem by postponing the destroy operation on the destination host to take place after the lock on the monitored host is released. It is safe to do it that way because the status of the VM in the destination host must be either Down or MigratingTo. If the status is MigratingTo, the monitoring of the destination host just ignores the VM. If the status is Down, the monitoring of the destination host will just destroy the VM and the second destroy operation will fail. Change-Id: Ia4c52e0f8109f9599965078d2c6dd4db705cec97 Bug-Url: https://bugzilla.redhat.com/1142776 Signed-off-by: Arik Hadas <aha...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java 3 files changed, 35 insertions(+), 15 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/99/35099/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java index c54dd91..925129e 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java @@ -46,6 +46,7 @@ import org.ovirt.engine.core.common.businessentities.StoragePool; import org.ovirt.engine.core.common.businessentities.StoragePoolStatus; import org.ovirt.engine.core.common.businessentities.VDS; +import org.ovirt.engine.core.common.businessentities.VM; import org.ovirt.engine.core.common.businessentities.VmDynamic; import org.ovirt.engine.core.common.businessentities.VmStatic; import org.ovirt.engine.core.common.businessentities.gluster.GlusterBrickEntity; @@ -59,6 +60,7 @@ import org.ovirt.engine.core.common.eventqueue.EventType; import org.ovirt.engine.core.common.locks.LockingGroup; import org.ovirt.engine.core.common.utils.Pair; +import org.ovirt.engine.core.common.vdscommands.DestroyVmVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.DisconnectStoragePoolVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.SetVmTicketVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.StartSpiceVDSCommandParameters; @@ -80,6 +82,7 @@ import org.ovirt.engine.core.utils.log.Log; import org.ovirt.engine.core.utils.log.LogFactory; import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil; +import org.ovirt.engine.core.vdsbroker.DestroyVmVDSCommand; import org.ovirt.engine.core.vdsbroker.MonitoringStrategyFactory; import org.ovirt.engine.core.vdsbroker.ResourceManager; import org.ovirt.engine.core.vdsbroker.irsbroker.IrsBrokerCommand; @@ -519,6 +522,31 @@ }); } + @Override + public void destroyVms(final List<Pair<VM, Guid>> vmsToDestroy) { + if (vmsToDestroy.isEmpty()) { + return; + } + + ThreadPoolUtil.execute(new Runnable() { + @Override + public void run() { + for (Pair<VM, Guid> vmToDestroy : vmsToDestroy) { + DestroyVmVDSCommand<DestroyVmVDSCommandParameters> destroyCmd = + new DestroyVmVDSCommand<> + (new DestroyVmVDSCommandParameters(vmToDestroy.getSecond(), vmToDestroy.getFirst().getId(), true, false, 0)); + destroyCmd.execute(); + if (destroyCmd.getVDSReturnValue().getSucceeded()) { + log.infoFormat("Stopped migrating vm: {0} on vds: {1}", vmToDestroy.getFirst().getName(), vmToDestroy.getSecond()); + } else { + log.infoFormat("Could not stop migrating vm: {0} on vds: {1}, Error: {2}", vmToDestroy.getFirst().getName(), + vmToDestroy.getSecond(), destroyCmd.getVDSReturnValue().getExceptionString()); + } + } + } + }); + } + private static final Log log = LogFactory.getLog(VdsEventListener.class); } diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java index ef1c795..2d8cff4 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java @@ -7,6 +7,7 @@ import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.errors.VdcBllErrors; import org.ovirt.engine.core.common.eventqueue.EventResult; +import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.compat.TransactionScopeOption; @@ -77,4 +78,6 @@ * @param vdsId */ void updateSlaPolicies(List<Guid> vmIds, Guid vdsId); + + public void destroyVms(List<Pair<VM, Guid>> vmsToDestroy); } diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java index 2d152af..7a308b4 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java @@ -131,6 +131,8 @@ private boolean refreshedCapabilities = false; private static Map<Guid, Long> hostDownTimes = new HashMap<>(); private boolean vdsMaintenanceTimeoutOccurred; + /** Pairs of VM and the ID of the VDS it should be destroyed from */ + private final List<Pair<VM, Guid>> vmsToDestroy = new LinkedList<>(); private static final Log log = LogFactory.getLog(VdsUpdateRunTimeInfo.class); @@ -581,6 +583,7 @@ : "unknown"); } } + getVdsEventListener().destroyVms(vmsToDestroy); // rerun all vms from rerun list for (Guid vm_guid : _vmsToRerun) { log.errorFormat("Rerun vm {0}. Called from vds {1}", vm_guid, _vds.getName()); @@ -1672,21 +1675,7 @@ // exit status that's OK, otherwise.. if (vmDynamic != null && vmDynamic.getExitStatus() != VmExitStatus.Normal) { if (curVm.getMigratingToVds() != null) { - DestroyVmVDSCommand<DestroyVmVDSCommandParameters> destroyCmd = - new DestroyVmVDSCommand<DestroyVmVDSCommandParameters> - (new DestroyVmVDSCommandParameters(new Guid(curVm.getMigratingToVds().toString()), - curVm.getId(), - true, - false, - 0)); - destroyCmd.execute(); - if (destroyCmd.getVDSReturnValue().getSucceeded()) { - log.infoFormat("Stopped migrating vm: {0} on vds: {1}", curVm.getName(), - curVm.getMigratingToVds()); - } else { - log.infoFormat("Could not stop migrating vm: {0} on vds: {1}, Error: {2}", curVm.getName(), - curVm.getMigratingToVds(), destroyCmd.getVDSReturnValue().getExceptionString()); - } + vmsToDestroy.add(new Pair(curVm, curVm.getMigratingToVds())); } // set vm status to down if source vm crushed ResourceManager.getInstance().InternalSetVmStatus(curVm, -- To view, visit http://gerrit.ovirt.org/35099 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia4c52e0f8109f9599965078d2c6dd4db705cec97 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: ovirt-engine-3.5 Gerrit-Owner: Arik Hadas <aha...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches