Roy Golan has uploaded a new change for review. Change subject: core: Detect host in idle prep for maintenance and re-migrate ......................................................................
core: Detect host in idle prep for maintenance and re-migrate Host going to Maintenance can have ongoing incoming migration before it changes its status to Preparing for maintainance. This host will migrate all its VMs except those incoming, which are not he's still. This will result the host staying in PreparingForMaint till manual intervention. The solution has 3 parts: 1. cancel all incoming migrations 2. make cancel migration command trigger a rerun of the migration automatically 3. detect if host is idle long time in PreparingForMaint and migrate VMs out Bug-Url: https://bugzilla.redhat.com/966503 Change-Id: I0e18bcfbc566b6fd92d276c5c739ba607e2a53f0 Signed-off-by: Roy Golan <rgo...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java A backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java M packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql M packaging/dbscripts/vms_sp.sql M packaging/etc/engine-config/engine-config.properties 13 files changed, 100 insertions(+), 22 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/99/20899/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java index 2abc4ee..03e2f23 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java @@ -6,7 +6,7 @@ import org.ovirt.engine.core.common.errors.VdcBllMessages; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; import org.ovirt.engine.core.common.vdscommands.VDSReturnValue; -import org.ovirt.engine.core.common.vdscommands.VdsAndVmIDVDSParametersBase; +import org.ovirt.engine.core.vdsbroker.vdsbroker.CancelMigrationVDSParameters; public class CancelMigrateVmCommand<T extends VmOperationParameterBase> extends VmCommand<T> { @@ -20,8 +20,8 @@ .getResourceManager() .RunVdsCommand( VDSCommandType.CancelMigrate, - new VdsAndVmIDVDSParametersBase(getVm().getRunOnVds(), - getParameters().getVmId())); + new CancelMigrationVDSParameters(getVm().getRunOnVds(), + getParameters().getVmId(), false)); setSucceeded(retVal.getSucceeded()); } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java index 1df7a74..035f8a0 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java @@ -23,15 +23,17 @@ import org.ovirt.engine.core.common.businessentities.VDSGroup; import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.businessentities.VM; +import org.ovirt.engine.core.common.businessentities.VMStatus; import org.ovirt.engine.core.common.businessentities.VdsSpmStatus; import org.ovirt.engine.core.common.businessentities.network.Network; -import org.ovirt.engine.core.common.locks.LockingGroup; import org.ovirt.engine.core.common.errors.VdcBllMessages; +import org.ovirt.engine.core.common.locks.LockingGroup; import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.common.vdscommands.SetVdsStatusVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.dbbroker.DbFacade; +import org.ovirt.engine.core.vdsbroker.vdsbroker.CancelMigrationVDSParameters; @InternalCommandAttribute @NonTransactiveCommandAttribute @@ -64,9 +66,21 @@ for (VDS vds : spms) { setVdsStatusToPrepareForMaintaice(vds); } + cancelIncommingMigrations(); freeLock(); } + private void cancelIncommingMigrations() { + for (Guid hostId :vdssToMaintenance.keySet()) { + for (VM vm : getVmDAO().getAllMigratingToHost(hostId)) { + if (vm.getStatus() == VMStatus.MigratingFrom) { + log.infoFormat("Cancelling incoming migration of {0} id: {1}", vm, vm.getId()); + runVdsCommand(VDSCommandType.CancelMigrate, new CancelMigrationVDSParameters(vm.getRunOnVds(), vm.getId(), true)); + } + } + } + } + private void setVdsStatusToPrepareForMaintaice(VDS vds) { if (vds.getStatus() != VDSStatus.PreparingForMaintenance && vds.getStatus() != VDSStatus.NonResponsive && vds.getStatus() != VDSStatus.Down) { diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java index 4f7ccff..b784dc6 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java @@ -1,6 +1,7 @@ package org.ovirt.engine.core.bll; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; @@ -18,6 +19,7 @@ import org.ovirt.engine.core.common.action.AddVmFromScratchParameters; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; import org.ovirt.engine.core.common.action.HostStoragePoolParametersBase; +import org.ovirt.engine.core.common.action.MaintenanceNumberOfVdssParameters; import org.ovirt.engine.core.common.action.MigrateVmToServerParameters; import org.ovirt.engine.core.common.action.ReconstructMasterParameters; import org.ovirt.engine.core.common.action.RunVmParams; @@ -317,6 +319,13 @@ } @Override + public void handleVdsMaintenanceTimeout(final VDS vds) { + // try to put the host to Maintenance again. + Backend.getInstance().runInternalAction(VdcActionType.MaintenanceNumberOfVdss, + new MaintenanceNumberOfVdssParameters(Arrays.asList(vds.getId()), true)); + } + + @Override public void rerun(Guid vmId) { final IVdsAsyncCommand command = Backend.getInstance().getResourceManager().GetAsyncCommandForVm(vmId); if (command != null) { diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java index 17a1eed..27500a2 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java @@ -94,7 +94,7 @@ } private void MoveVMsToUnknown() { - getVmList().addAll(getVmDAO().getAllMigratingToHost(getVdsId())); + addMigratedVmsNotUpYet(); for (VM vm : getVmList()) { DestroyVmOnDestination(vm); Backend.getInstance() @@ -108,6 +108,16 @@ } } + private void addMigratedVmsNotUpYet() { + for (VM incomingVm : getVmDAO().getAllMigratingToHost(getVdsId())) { + if (incomingVm.getStatus() == VMStatus.MigratingTo) { + // this VM is finished the migration handover and is running on this host now + // and should be treated as well. + getVmList().add(incomingVm); + } + } + } + @Override public Map<String, String> getJobMessageProperties() { if (jobProperties == null) { diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java index db8093f..b4b23c8 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java @@ -60,4 +60,6 @@ boolean restartVds(Guid vdsId); void addExternallyManagedVms(List<VmStatic> externalVmList); + + void handleVdsMaintenanceTimeout(VDS vds); } diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java index b43410a..56aebf1 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java @@ -1533,6 +1533,10 @@ @DefaultValueAttribute("500") GlanceImageTotalListSize(543), + @TypeConverterAttribute(Integer.class) + @DefaultValueAttribute("300") + HostPreparingForMaintenanceIdleTime(544), + Invalid(65535); private int intValue; @@ -1553,7 +1557,4 @@ return intValue; } - public static ConfigValues forValue(int value) { - return mappings.get(value); - } } diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java index c14b395..fd1fabd 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java @@ -57,6 +57,7 @@ private VDS _vds; private long lastUpdate; private long updateStartTime; + private long nextMaintenanceAttemptTime; private static Log log = LogFactory.getLog(VdsManager.class); @@ -432,6 +433,10 @@ vds.setPreviousStatus(vds.getStatus()); if (_vds != null) { _vds.setPreviousStatus(vds.getStatus()); + if (_vds.getStatus() == VDSStatus.PreparingForMaintenance) { + calculateNextMaintenanceAttemptTime(); + } + } } // update to new status @@ -739,4 +744,13 @@ setStatus(VDSStatus.Connecting, vds); UpdateDynamicData(vds.getDynamicData()); } + + public void calculateNextMaintenanceAttemptTime() { + this.nextMaintenanceAttemptTime = System.currentTimeMillis() + TimeUnit.MILLISECONDS.convert( + Config.<Integer>GetValue(ConfigValues.HostPreparingForMaintenanceIdleTime), TimeUnit.SECONDS); + } + + public boolean isTimeToRetryMaintenance() { + return System.currentTimeMillis() > nextMaintenanceAttemptTime; + } } diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java index c9501d5..0e88d9e 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java @@ -1856,15 +1856,21 @@ } private void moveVDSToMaintenanceIfNeeded() { - if ((_vds.getStatus() == VDSStatus.PreparingForMaintenance) - && monitoringStrategy.canMoveToMaintenance(_vds)) { - _vdsManager.setStatus(VDSStatus.Maintenance, _vds); - _saveVdsDynamic = true; - _saveVdsStatistics = true; - log.infoFormat( - "Updated vds status from 'Preparing for Maintenance' to 'Maintenance' in database, vds = {0} : {1}", - _vds.getId(), - _vds.getName()); + if (_vds.getStatus() == VDSStatus.PreparingForMaintenance) { + if (monitoringStrategy.canMoveToMaintenance(_vds)) { + _vdsManager.setStatus(VDSStatus.Maintenance, _vds); + _saveVdsDynamic = true; + _saveVdsStatistics = true; + log.infoFormat( + "Updated vds status from 'Preparing for Maintenance' to 'Maintenance' in database, vds = {0} : {1}", + _vds.getId(), + _vds.getName()); + } else { + if (_vdsManager.isTimeToRetryMaintenance()) { + ResourceManager.getInstance().getEventListener().handleVdsMaintenanceTimeout(_vds); + _vdsManager.calculateNextMaintenanceAttemptTime(); + } + } } } diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java index a080c98..b0a209b 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java @@ -2,11 +2,10 @@ import org.ovirt.engine.core.common.errors.VDSError; import org.ovirt.engine.core.common.errors.VdcBllErrors; -import org.ovirt.engine.core.common.vdscommands.VdsAndVmIDVDSParametersBase; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.vdsbroker.ResourceManager; -public class CancelMigrateVDSCommand<P extends VdsAndVmIDVDSParametersBase> extends VdsBrokerCommand<P> { +public class CancelMigrateVDSCommand<P extends CancelMigrationVDSParameters> extends VdsBrokerCommand<P> { public CancelMigrateVDSCommand(P parameters) { super(parameters); } @@ -16,7 +15,9 @@ Guid vmId = getParameters().getVmId(); status = getBroker().migrateCancel(vmId.toString()); proceedProxyReturnValue(); - ResourceManager.getInstance().RemoveAsyncRunningVm(vmId); + if (!getParameters().isRerunAfterCancel()) { + ResourceManager.getInstance().RemoveAsyncRunningVm(vmId); + } } /** diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java new file mode 100644 index 0000000..a9e7889 --- /dev/null +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java @@ -0,0 +1,18 @@ +package org.ovirt.engine.core.vdsbroker.vdsbroker; + +import org.ovirt.engine.core.common.vdscommands.VdsAndVmIDVDSParametersBase; +import org.ovirt.engine.core.compat.Guid; + +public class CancelMigrationVDSParameters extends VdsAndVmIDVDSParametersBase { + + private boolean rerunAfterCancel; + + public CancelMigrationVDSParameters(Guid vdsId, Guid vmId, boolean rerunAfterCancel) { + super(vdsId, vmId); + this.rerunAfterCancel = rerunAfterCancel; + } + + public boolean isRerunAfterCancel() { + return rerunAfterCancel; + } +} diff --git a/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql b/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql index 1c890db..943161e 100644 --- a/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql +++ b/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql @@ -157,6 +157,7 @@ select fn_db_add_config_value('HardwareInfoEnabled','true','3.3'); select fn_db_add_config_value('HighUtilizationForEvenlyDistribute','75','general'); select fn_db_add_config_value('HighUtilizationForPowerSave','75','general'); +select fn_db_add_config_value('HostPreparingForMaintenanceIdleTime', '300', 'general'); select fn_db_add_config_value('HostTimeDriftInSec','300','general'); select fn_db_add_config_value('HotPlugEnabled','false','3.0'); select fn_db_add_config_value('HotPlugEnabled','true','3.1'); diff --git a/packaging/dbscripts/vms_sp.sql b/packaging/dbscripts/vms_sp.sql index 38f26cd..7225dd9 100644 --- a/packaging/dbscripts/vms_sp.sql +++ b/packaging/dbscripts/vms_sp.sql @@ -861,8 +861,7 @@ BEGIN RETURN QUERY SELECT DISTINCT vms.* FROM vms - WHERE migrating_to_vds = v_vds_id - AND status = 6; + WHERE migrating_to_vds = v_vds_id; END; $procedure$ LANGUAGE plpgsql; diff --git a/packaging/etc/engine-config/engine-config.properties b/packaging/etc/engine-config/engine-config.properties index 8db5a49..57be3f9 100644 --- a/packaging/etc/engine-config/engine-config.properties +++ b/packaging/etc/engine-config/engine-config.properties @@ -340,3 +340,6 @@ ExternalSchedulerResponseTimeout.type=Integer DwhHeartBeatInterval.description="Interval after which DWH is informed that engine is running (in seconds)" DwhHeartBeatInterval.type=Integer +HostPreparingForMaintenanceIdleTime.type=Integer +HostPreparingForMaintenanceIdleTime.description="Time to wait in seconds to determine if host is idling in status PreparingForMaintenace. When that interval is met it triggers another attempt to move the host to Maintenance" + -- To view, visit http://gerrit.ovirt.org/20899 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0e18bcfbc566b6fd92d276c5c739ba607e2a53f0 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: ovirt-engine-3.3 Gerrit-Owner: Roy Golan <rgo...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches