Roy Golan has uploaded a new change for review. Change subject: core: throttle running of VMs (#843058) ......................................................................
core: throttle running of VMs (#843058) https://bugzilla.redhat.com/show_bug.cgi?id=843058 Bulk running of VMs reguallary fail short in running all VMs. The reason is The VdsSelector counts and reserve memory using a) host commited memory: # of VMs * each VM static mem b) pending VM count memory : # of VMs to run * thier memory count - a shared state variable pending VM count is increased by the end of RUnVm and decreased by VdsUpdateRunTimeInfo i.e two independent proccesses. As long as the VdsUpdateRunTimeInfo don't run, theres an overflow in the calculation of the free memory to run VM which causes a host to be falsely not selected to run the VM. The throtller here is using the shared-state between the VdsUpdateRunTimeInfo and the RunVmCommands, the pending VM count and is using a wait/notify semantics to let the VdsUpdateRunTimeInfo to interleave if we don't have enough memory run the VM. - Tests - setup: Host with 8Gb mem, 3.0 pool, cluster, 10Vms 2 Host with 4Gb mem, 3.1 pool, cluster 5VMs muliple run 10 VMs - prior to the fix the 9th and 10th can locate a host to run on after the fix all 10 VMs complete the run. In the 9th VM run the selector waits for the update to decrease memory and continue after less than a second. migrate VMs back and forth of 5Vms with no issues. Change-Id: I076ede6cba919bc61f7546d7b29ef436eb6d3375 Signed-off-by: Roy Golan <rgo...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MigrateVmCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommandBase.java A backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/Throttler.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java A backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsFreeMemoryChecker.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsSelector.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VmPoolCommandBase.java A backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VoidThrottler.java M backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/RunVmCommandTest.java M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsAsyncCommand.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java 12 files changed, 146 insertions(+), 40 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/04/7204/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MigrateVmCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MigrateVmCommand.java index 106a5da..8f4a67c 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MigrateVmCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MigrateVmCommand.java @@ -37,7 +37,7 @@ public MigrateVmCommand(T parameters) { super(parameters); - setVdsSelector(new VdsSelector(getVm(), getVdsDestinationId(), true)); + setVdsSelector(new VdsSelector(getVm(), getVdsDestinationId(), true, new VdsFreeMemoryChecker(this))); forcedMigrationForNonMigratableVM = parameters.isForceMigrationForNonMigratableVM(); } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommand.java index f3708a0..afe75cd 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommand.java @@ -119,7 +119,7 @@ if (getVm() != null) { Guid destVdsId = (getDestinationVds() != null) ? (Guid) getDestinationVds().getId() : null; - setVdsSelector(new VdsSelector(getVm(), destVdsId, true)); + setVdsSelector(new VdsSelector(getVm(), destVdsId, true, new VdsFreeMemoryChecker(this))); refreshBootParameters(runVmParameters); // set vm disks @@ -247,6 +247,7 @@ _isRerun = false; } } + @Override protected void ExecuteVmCommand() { @@ -879,4 +880,5 @@ @Override public void addQuotaPermissionSubject(List<PermissionSubject> quotaPermissionList) { } + } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommandBase.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommandBase.java index 9d84370..74f07e7 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommandBase.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RunVmCommandBase.java @@ -3,6 +3,10 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; import org.ovirt.engine.core.bll.job.ExecutionContext; import org.ovirt.engine.core.bll.job.ExecutionContext.ExecutionMethod; @@ -40,11 +44,12 @@ * Base class for asynchronous running process handling */ public abstract class RunVmCommandBase<T extends VmOperationParameterBase> extends VmCommand<T> implements - IVdsAsyncCommand { + IVdsAsyncCommand, Throttler { private static Log log = LogFactory.getLog(RunVmCommandBase.class); protected static final HashMap<Guid, Integer> _vds_pending_vm_count = new HashMap<Guid, Integer>(); - private final Object _decreaseLock = new Object(); + protected final Lock decreaseLock = new ReentrantLock(); + protected final Condition decreased = decreaseLock.newCondition(); private VdsSelector privateVdsSelector; protected boolean _isRerun = false; protected VDS _destinationVds; @@ -273,8 +278,6 @@ */ @Override public void RunningSucceded() { - DecreasePendingVms(getCurrentVdsId()); - setSucceeded(true); setActionReturnValue(VMStatus.Up); log(); @@ -360,7 +363,8 @@ } protected void DecreasePendingVms(Guid vdsId) { - synchronized (_decreaseLock) { + try { + decreaseLock.lock(); boolean updateDynamic = false; VDS vds = DbFacade.getInstance().getVdsDAO().get(vdsId); if (vds == null) @@ -406,7 +410,37 @@ vds.getvds_name(), vds.getpending_vmem_size(), getVm().getvm_name()); } } + decreased.signal(); + } finally { + decreaseLock.unlock(); } } + /** + * throttle bulk run of VMs by waiting for the update of run-time to kick in and fire <br> + * the DecreasePendingVms event. + * @see VdsEventListener + * @See VdsUpdateRunTimeInfo + */ + @Override + public void throttle() { + if (log.isDebugEnabled()) { + log.debug("try to wait for te engine update the host memory and cpu stats"); + } + + decreaseLock.lock(); + try { + // wait for the run-time refresh to decrease any current powering-up VMs + decreased.await(Config.<Integer> GetValue(ConfigValues.VdsRefreshRate), TimeUnit.SECONDS); + } catch (InterruptedException e) { + // ignore + } finally { + decreaseLock.unlock(); + } + } + + @Override + public void onPowerringUp() { + DecreasePendingVms(getCurrentVdsId()); + } } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/Throttler.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/Throttler.java new file mode 100644 index 0000000..1f36f93 --- /dev/null +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/Throttler.java @@ -0,0 +1,14 @@ +package org.ovirt.engine.core.bll; + +/** + * Some commands e.g RunVm may run as a bulk AKA {@link MultipleActinosRunner} and performs logic to <br> + * count and reserve memory and CPU to assure there are both enough resources to complete them<br> + * (i.e run a VM on a selected host) and that we don't exceed those for the subsequent executions.<br> + * Moreover bulk operations may cause a pick in VDSM resources utilization and the engine can regulate <br> + * the pace to enable <br> + * Successful end of the operations. + */ +public interface Throttler { + + public void throttle(); +} diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java index 5bd929d..d81c503 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java @@ -240,25 +240,32 @@ @Override public void processOnVmPoweringUp(Guid vds_id, Guid vmid, String display_ip, int display_port) { IVdsAsyncCommand command = Backend.getInstance().getResourceManager().GetAsyncCommandForVm(vmid); - if (command != null && command.getAutoStart() && command.getAutoStartVdsId() != null) { - try { - String otp64 = Ticketing.GenerateOTP(); - Backend.getInstance() - .getResourceManager() - .RunVdsCommand(VDSCommandType.SetVmTicket, - new SetVmTicketVDSCommandParameters(vds_id, vmid, otp64, 60, "", Guid.Empty)); - log.infoFormat( - "VdsEventListener.ProcessOnVmPoweringUp - Auto start logic, starting spice to vm - {0} ", vmid); - Backend.getInstance() - .getResourceManager() - .RunVdsCommand( - VDSCommandType.StartSpice, - new StartSpiceVDSCommandParameters(command.getAutoStartVdsId(), display_ip, - display_port, otp64)); - } catch (RuntimeException ex) { - log.errorFormat( - "VdsEventListener.ProcessOnVmPoweringUp - failed to start spice on VM - {0} - {1} - {2}", vmid, - ex.getMessage(), ex.getStackTrace()); + + if (command != null) { + command.onPowerringUp(); + if (command.getAutoStart() && command.getAutoStartVdsId() != null) { + try { + String otp64 = Ticketing.GenerateOTP(); + Backend.getInstance() + .getResourceManager() + .RunVdsCommand(VDSCommandType.SetVmTicket, + new SetVmTicketVDSCommandParameters(vds_id, vmid, otp64, 60, "", Guid.Empty)); + log.infoFormat( + "VdsEventListener.ProcessOnVmPoweringUp - Auto start logic, starting spice to vm - {0} ", + vmid); + Backend.getInstance() + .getResourceManager() + .RunVdsCommand( + VDSCommandType.StartSpice, + new StartSpiceVDSCommandParameters(command.getAutoStartVdsId(), display_ip, + display_port, otp64)); + } catch (RuntimeException ex) { + log.errorFormat( + "VdsEventListener.ProcessOnVmPoweringUp - failed to start spice on VM - {0} - {1} - {2}", + vmid, + ex.getMessage(), + ex.getStackTrace()); + } } } } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsFreeMemoryChecker.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsFreeMemoryChecker.java new file mode 100644 index 0000000..92d7938 --- /dev/null +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsFreeMemoryChecker.java @@ -0,0 +1,34 @@ +package org.ovirt.engine.core.bll; + +import org.apache.commons.logging.Log; +import org.ovirt.engine.core.common.businessentities.VDS; +import org.ovirt.engine.core.common.businessentities.VM; +import org.ovirt.engine.core.dal.dbbroker.DbFacade; +import org.ovirt.engine.core.utils.log.LogFactory; + +public class VdsFreeMemoryChecker { + + private Throttler throttler; + + private static Log log = LogFactory.getLog(VdsFreeMemoryChecker.class); + + public VdsFreeMemoryChecker(Throttler throttler) { + this.throttler = throttler; + } + + public boolean evaluate(VDS vds, VM vm) { + // first check if this host has memory. + if (!RunVmCommandBase.hasMemoryToRunVM(vds, vm)) { + // not enough memory to run the vm invoke the throttler. + log.debug("not enough memory on host. throttling..."); + throttler.throttle(); + + // fetch a fresh vds and check its memory again + vds = DbFacade.getInstance().getVdsDAO().get(vds.getId()); + + // check free memory on the updated host + return RunVmCommandBase.hasMemoryToRunVM(vds, vm); + } + return true; + } +} diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsSelector.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsSelector.java index 8c63f09..7646122 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsSelector.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsSelector.java @@ -53,6 +53,7 @@ } private VM privateVm; + private VdsFreeMemoryChecker memoryChecker; private VM getVm() { return privateVm; @@ -62,10 +63,11 @@ privateVm = value; } - public VdsSelector(VM vm, NGuid destinationVdsId, boolean dedicatedFirst) { + public VdsSelector(VM vm, NGuid destinationVdsId, boolean dedicatedFirst, VdsFreeMemoryChecker memoryChecker) { setVm(vm); setDestinationVdsId(destinationVdsId); setCheckDestinationFirst(dedicatedFirst); + this.memoryChecker = memoryChecker; } public Guid GetVdsToRunOn() { @@ -226,7 +228,7 @@ return new ValidationResult(VdcBllMessages.ACTION_TYPE_FAILED_VDS_VM_CLUSTER); } // If Vm in Paused mode - no additional memory allocation needed - else if (getVm().getstatus() != VMStatus.Paused && !RunVmCommandBase.hasMemoryToRunVM(vds, getVm())) { + else if (getVm().getstatus() != VMStatus.Paused && !memoryChecker.evaluate(vds, getVm())) { // not enough memory // In case we are using this function in migration we make sure we // don't allocate the same VDS @@ -297,7 +299,7 @@ continue; // apply limit on vds memory over commit. - if (!RunVmCommandBase.hasMemoryToRunVM(curVds, getVm())) + if (!memoryChecker.evaluate(curVds, getVm())) continue; // In case we are using this function in migration we make sure we diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VmPoolCommandBase.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VmPoolCommandBase.java index ff27ed5..0455b41 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VmPoolCommandBase.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VmPoolCommandBase.java @@ -245,7 +245,7 @@ RunVmParams runVmParams = tempVar; VdsSelector vdsSelector = new VdsSelector(vm, ((runVmParams.getDestinationVdsId()) != null) ? runVmParams.getDestinationVdsId() - : vm.getdedicated_vm_for_vds(), true); + : vm.getdedicated_vm_for_vds(), true, new VdsFreeMemoryChecker(new VoidThrottler())); return VmRunHandler.getInstance().canRunVm(vm, messages, runVmParams, diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VoidThrottler.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VoidThrottler.java new file mode 100644 index 0000000..3b4774f --- /dev/null +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VoidThrottler.java @@ -0,0 +1,12 @@ +package org.ovirt.engine.core.bll; + +/** + * Void throttler should be used in cases when no throttling is desired e.g in the validation phase of commands. + */ +public class VoidThrottler implements Throttler { + + @Override + public void throttle() { + } + +} diff --git a/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/RunVmCommandTest.java b/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/RunVmCommandTest.java index 8858d48..1c2e0a6 100644 --- a/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/RunVmCommandTest.java +++ b/backend/manager/modules/bll/src/test/java/org/ovirt/engine/core/bll/RunVmCommandTest.java @@ -337,7 +337,8 @@ final VM vm = new VM(); doReturn(vm).when(command).getVm(); - doReturn(new VdsSelector(vm, new Guid(), true)).when(command).getVdsSelector(); + doReturn(new VdsSelector(vm, new Guid(), true, new VdsFreeMemoryChecker(command))).when(command) + .getVdsSelector(); assertFalse(command.canRunVm()); assertTrue(command.getReturnValue().getCanDoActionMessages().contains("VM_CANNOT_RUN_FROM_DISK_WITHOUT_DISK")); @@ -353,7 +354,8 @@ final VM vm = new VM(); vm.setstatus(VMStatus.Up); doReturn(vm).when(command).getVm(); - doReturn(new VdsSelector(vm, new NGuid(), true)).when(command).getVdsSelector(); + doReturn(new VdsSelector(vm, new NGuid(), true, new VdsFreeMemoryChecker(command))).when(command) + .getVdsSelector(); assertFalse(command.canRunVm()); assertTrue(command.getReturnValue().getCanDoActionMessages().contains("ACTION_TYPE_FAILED_VM_IS_RUNNING")); diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsAsyncCommand.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsAsyncCommand.java index 4cd4a2f..36dcfee 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsAsyncCommand.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsAsyncCommand.java @@ -15,4 +15,6 @@ * Assures the Job/Step are completed. */ void reportCompleted(); + + void onPowerringUp(); } diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java index 8c63fb8..a074e2e 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java @@ -23,7 +23,6 @@ import org.ovirt.engine.core.common.vdscommands.SetVdsStatusVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; import org.ovirt.engine.core.common.vdscommands.VdsIdAndVdsVDSCommandParametersBase; -import org.ovirt.engine.core.compat.DateTime; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.compat.KeyValuePairCompat; import org.ovirt.engine.core.compat.TransactionScopeOption; @@ -51,6 +50,9 @@ public class VdsManager { private VDS _vds; + private long lastUpdate = System.currentTimeMillis(); + + private static Log log = LogFactory.getLog(VdsManager.class); protected VDS getVds() { return _vds; @@ -90,8 +92,6 @@ } } } - - public DateTime mLastUpdate = DateTime.getNow(); private final AtomicInteger mFailedToRunVmAttempts; private final AtomicInteger mUnrespondedAttempts; @@ -233,7 +233,7 @@ _vdsUpdater = new VdsUpdateRunTimeInfo(VdsManager.this, _vds); _vdsUpdater.Refresh(); mUnrespondedAttempts.set(0); - mLastUpdate = DateTime.getNow(); + lastUpdate = System.currentTimeMillis(); } if (!getInitialized() && getVds().getstatus() != VDSStatus.NonResponsive && getVds().getstatus() != VDSStatus.PendingApproval) { @@ -598,8 +598,7 @@ public boolean handleNetworkException(VDSNetworkException ex, VDS vds) { if (vds.getstatus() != VDSStatus.Down) { if (mUnrespondedAttempts.get() < Config.<Integer> GetValue(ConfigValues.VDSAttemptsToResetCount) - || mLastUpdate.AddSeconds(Config.<Integer> GetValue(ConfigValues.TimeoutToResetVdsInSeconds)) - .compareTo(new java.util.Date()) > 0) { + || lastUpdate + (Config.<Integer> GetValue(ConfigValues.TimeoutToResetVdsInSeconds) * 1000) > System.currentTimeMillis()) { boolean result = false; if (vds.getstatus() != VDSStatus.Connecting && vds.getstatus() != VDSStatus.PreparingForMaintenance && vds.getstatus() != VDSStatus.NonResponsive) { @@ -668,7 +667,5 @@ public boolean isSetNonOperationalExecuted() { return isSetNonOperationalExecuted; } - - private static Log log = LogFactory.getLog(VdsManager.class); } -- To view, visit http://gerrit.ovirt.org/7204 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I076ede6cba919bc61f7546d7b29ef436eb6d3375 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: master Gerrit-Owner: Roy Golan <rgo...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches