Roy Golan has uploaded a new change for review.

Change subject: core: Detect host in idle prep for maintenance and re-migrate
......................................................................

core: Detect host in idle prep for maintenance and re-migrate

Host going to Maintenance can have ongoing incoming migration before it
changes its status to Preparing for maintainance.

This host will migrate all its VMs except those incoming, which are not
he's still. This will result the host staying in PreparingForMaint
till manual intervention.

The solution has 3 parts:

1. cancel all incoming migrations
2. make cancel migration command trigger a rerun of the migration
automatically
3. detect if host is idle long time in PreparingForMaint and migrate VMs
out

Bug-Url: https://bugzilla.redhat.com/966503
Change-Id: I0e18bcfbc566b6fd92d276c5c739ba607e2a53f0
Signed-off-by: Roy Golan <rgo...@redhat.com>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java
A 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java
M packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
M packaging/dbscripts/vms_sp.sql
M packaging/etc/engine-config/engine-config.properties
13 files changed, 100 insertions(+), 22 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/99/20899/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java
index 2abc4ee..03e2f23 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/CancelMigrateVmCommand.java
@@ -6,7 +6,7 @@
 import org.ovirt.engine.core.common.errors.VdcBllMessages;
 import org.ovirt.engine.core.common.vdscommands.VDSCommandType;
 import org.ovirt.engine.core.common.vdscommands.VDSReturnValue;
-import org.ovirt.engine.core.common.vdscommands.VdsAndVmIDVDSParametersBase;
+import org.ovirt.engine.core.vdsbroker.vdsbroker.CancelMigrationVDSParameters;
 
 public class CancelMigrateVmCommand<T extends VmOperationParameterBase> 
extends VmCommand<T> {
 
@@ -20,8 +20,8 @@
                 .getResourceManager()
                 .RunVdsCommand(
                         VDSCommandType.CancelMigrate,
-                        new VdsAndVmIDVDSParametersBase(getVm().getRunOnVds(),
-                                getParameters().getVmId()));
+                        new CancelMigrationVDSParameters(getVm().getRunOnVds(),
+                                getParameters().getVmId(), false));
 
         setSucceeded(retVal.getSucceeded());
     }
diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java
index 1df7a74..035f8a0 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/MaintenanceNumberOfVdssCommand.java
@@ -23,15 +23,17 @@
 import org.ovirt.engine.core.common.businessentities.VDSGroup;
 import org.ovirt.engine.core.common.businessentities.VDSStatus;
 import org.ovirt.engine.core.common.businessentities.VM;
+import org.ovirt.engine.core.common.businessentities.VMStatus;
 import org.ovirt.engine.core.common.businessentities.VdsSpmStatus;
 import org.ovirt.engine.core.common.businessentities.network.Network;
-import org.ovirt.engine.core.common.locks.LockingGroup;
 import org.ovirt.engine.core.common.errors.VdcBllMessages;
+import org.ovirt.engine.core.common.locks.LockingGroup;
 import org.ovirt.engine.core.common.utils.Pair;
 import 
org.ovirt.engine.core.common.vdscommands.SetVdsStatusVDSCommandParameters;
 import org.ovirt.engine.core.common.vdscommands.VDSCommandType;
 import org.ovirt.engine.core.compat.Guid;
 import org.ovirt.engine.core.dal.dbbroker.DbFacade;
+import org.ovirt.engine.core.vdsbroker.vdsbroker.CancelMigrationVDSParameters;
 
 @InternalCommandAttribute
 @NonTransactiveCommandAttribute
@@ -64,9 +66,21 @@
         for (VDS vds : spms) {
             setVdsStatusToPrepareForMaintaice(vds);
         }
+        cancelIncommingMigrations();
         freeLock();
     }
 
+    private void cancelIncommingMigrations() {
+        for (Guid hostId :vdssToMaintenance.keySet()) {
+            for (VM vm : getVmDAO().getAllMigratingToHost(hostId)) {
+                if (vm.getStatus() == VMStatus.MigratingFrom) {
+                    log.infoFormat("Cancelling incoming migration of {0} id: 
{1}", vm, vm.getId());
+                    runVdsCommand(VDSCommandType.CancelMigrate, new 
CancelMigrationVDSParameters(vm.getRunOnVds(), vm.getId(), true));
+                }
+            }
+        }
+    }
+
     private void setVdsStatusToPrepareForMaintaice(VDS vds) {
         if (vds.getStatus() != VDSStatus.PreparingForMaintenance && 
vds.getStatus() != VDSStatus.NonResponsive
                 && vds.getStatus() != VDSStatus.Down) {
diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
index 4f7ccff..b784dc6 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java
@@ -1,6 +1,7 @@
 package org.ovirt.engine.core.bll;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 
@@ -18,6 +19,7 @@
 import org.ovirt.engine.core.common.action.AddVmFromScratchParameters;
 import org.ovirt.engine.core.common.action.FenceVdsActionParameters;
 import org.ovirt.engine.core.common.action.HostStoragePoolParametersBase;
+import org.ovirt.engine.core.common.action.MaintenanceNumberOfVdssParameters;
 import org.ovirt.engine.core.common.action.MigrateVmToServerParameters;
 import org.ovirt.engine.core.common.action.ReconstructMasterParameters;
 import org.ovirt.engine.core.common.action.RunVmParams;
@@ -317,6 +319,13 @@
     }
 
     @Override
+    public void handleVdsMaintenanceTimeout(final VDS vds) {
+        // try to put the host to Maintenance again.
+        
Backend.getInstance().runInternalAction(VdcActionType.MaintenanceNumberOfVdss,
+                new 
MaintenanceNumberOfVdssParameters(Arrays.asList(vds.getId()), true));
+    }
+
+    @Override
     public void rerun(Guid vmId) {
         final IVdsAsyncCommand command = 
Backend.getInstance().getResourceManager().GetAsyncCommandForVm(vmId);
         if (command != null) {
diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
index 17a1eed..27500a2 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java
@@ -94,7 +94,7 @@
     }
 
     private void MoveVMsToUnknown() {
-        getVmList().addAll(getVmDAO().getAllMigratingToHost(getVdsId()));
+        addMigratedVmsNotUpYet();
         for (VM vm : getVmList()) {
             DestroyVmOnDestination(vm);
             Backend.getInstance()
@@ -108,6 +108,16 @@
         }
     }
 
+    private void addMigratedVmsNotUpYet() {
+        for (VM incomingVm : getVmDAO().getAllMigratingToHost(getVdsId())) {
+            if (incomingVm.getStatus() == VMStatus.MigratingTo) {
+                // this VM is finished the migration handover and is running 
on this host now
+                // and should be treated as well.
+                getVmList().add(incomingVm);
+            }
+        }
+    }
+
     @Override
     public Map<String, String> getJobMessageProperties() {
         if (jobProperties == null) {
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
index db8093f..b4b23c8 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java
@@ -60,4 +60,6 @@
     boolean restartVds(Guid vdsId);
 
     void addExternallyManagedVms(List<VmStatic> externalVmList);
+
+    void handleVdsMaintenanceTimeout(VDS vds);
 }
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
index b43410a..56aebf1 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
@@ -1533,6 +1533,10 @@
     @DefaultValueAttribute("500")
     GlanceImageTotalListSize(543),
 
+    @TypeConverterAttribute(Integer.class)
+    @DefaultValueAttribute("300")
+    HostPreparingForMaintenanceIdleTime(544),
+
     Invalid(65535);
 
     private int intValue;
@@ -1553,7 +1557,4 @@
         return intValue;
     }
 
-    public static ConfigValues forValue(int value) {
-        return mappings.get(value);
-    }
 }
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
index c14b395..fd1fabd 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java
@@ -57,6 +57,7 @@
     private VDS _vds;
     private long lastUpdate;
     private long updateStartTime;
+    private long nextMaintenanceAttemptTime;
 
     private static Log log = LogFactory.getLog(VdsManager.class);
 
@@ -432,6 +433,10 @@
                 vds.setPreviousStatus(vds.getStatus());
                 if (_vds != null) {
                     _vds.setPreviousStatus(vds.getStatus());
+                    if (_vds.getStatus() == VDSStatus.PreparingForMaintenance) 
{
+                        calculateNextMaintenanceAttemptTime();
+                    }
+
                 }
             }
             // update to new status
@@ -739,4 +744,13 @@
         setStatus(VDSStatus.Connecting, vds);
         UpdateDynamicData(vds.getDynamicData());
     }
+
+    public void calculateNextMaintenanceAttemptTime() {
+        this.nextMaintenanceAttemptTime = System.currentTimeMillis() + 
TimeUnit.MILLISECONDS.convert(
+                
Config.<Integer>GetValue(ConfigValues.HostPreparingForMaintenanceIdleTime), 
TimeUnit.SECONDS);
+    }
+
+    public boolean isTimeToRetryMaintenance() {
+        return System.currentTimeMillis() > nextMaintenanceAttemptTime;
+    }
 }
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java
index c9501d5..0e88d9e 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsUpdateRunTimeInfo.java
@@ -1856,15 +1856,21 @@
     }
 
     private void moveVDSToMaintenanceIfNeeded() {
-        if ((_vds.getStatus() == VDSStatus.PreparingForMaintenance)
-                && monitoringStrategy.canMoveToMaintenance(_vds)) {
-            _vdsManager.setStatus(VDSStatus.Maintenance, _vds);
-            _saveVdsDynamic = true;
-            _saveVdsStatistics = true;
-            log.infoFormat(
-                    "Updated vds status from 'Preparing for Maintenance' to 
'Maintenance' in database,  vds = {0} : {1}",
-                    _vds.getId(),
-                    _vds.getName());
+        if (_vds.getStatus() == VDSStatus.PreparingForMaintenance) {
+            if (monitoringStrategy.canMoveToMaintenance(_vds)) {
+                _vdsManager.setStatus(VDSStatus.Maintenance, _vds);
+                _saveVdsDynamic = true;
+                _saveVdsStatistics = true;
+                log.infoFormat(
+                        "Updated vds status from 'Preparing for Maintenance' 
to 'Maintenance' in database,  vds = {0} : {1}",
+                        _vds.getId(),
+                        _vds.getName());
+            } else {
+                if (_vdsManager.isTimeToRetryMaintenance()) {
+                    
ResourceManager.getInstance().getEventListener().handleVdsMaintenanceTimeout(_vds);
+                    _vdsManager.calculateNextMaintenanceAttemptTime();
+                }
+            }
         }
     }
 
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java
index a080c98..b0a209b 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrateVDSCommand.java
@@ -2,11 +2,10 @@
 
 import org.ovirt.engine.core.common.errors.VDSError;
 import org.ovirt.engine.core.common.errors.VdcBllErrors;
-import org.ovirt.engine.core.common.vdscommands.VdsAndVmIDVDSParametersBase;
 import org.ovirt.engine.core.compat.Guid;
 import org.ovirt.engine.core.vdsbroker.ResourceManager;
 
-public class CancelMigrateVDSCommand<P extends VdsAndVmIDVDSParametersBase> 
extends VdsBrokerCommand<P> {
+public class CancelMigrateVDSCommand<P extends CancelMigrationVDSParameters> 
extends VdsBrokerCommand<P> {
     public CancelMigrateVDSCommand(P parameters) {
         super(parameters);
     }
@@ -16,7 +15,9 @@
         Guid vmId = getParameters().getVmId();
         status = getBroker().migrateCancel(vmId.toString());
         proceedProxyReturnValue();
-        ResourceManager.getInstance().RemoveAsyncRunningVm(vmId);
+        if (!getParameters().isRerunAfterCancel()) {
+            ResourceManager.getInstance().RemoveAsyncRunningVm(vmId);
+        }
     }
 
     /**
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java
new file mode 100644
index 0000000..a9e7889
--- /dev/null
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/vdsbroker/CancelMigrationVDSParameters.java
@@ -0,0 +1,18 @@
+package org.ovirt.engine.core.vdsbroker.vdsbroker;
+
+import org.ovirt.engine.core.common.vdscommands.VdsAndVmIDVDSParametersBase;
+import org.ovirt.engine.core.compat.Guid;
+
+public class CancelMigrationVDSParameters extends VdsAndVmIDVDSParametersBase {
+
+    private boolean rerunAfterCancel;
+
+    public CancelMigrationVDSParameters(Guid vdsId, Guid vmId, boolean 
rerunAfterCancel) {
+        super(vdsId, vmId);
+        this.rerunAfterCancel = rerunAfterCancel;
+    }
+
+    public boolean isRerunAfterCancel() {
+        return rerunAfterCancel;
+    }
+}
diff --git a/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql 
b/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
index 1c890db..943161e 100644
--- a/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
+++ b/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
@@ -157,6 +157,7 @@
 select fn_db_add_config_value('HardwareInfoEnabled','true','3.3');
 select 
fn_db_add_config_value('HighUtilizationForEvenlyDistribute','75','general');
 select fn_db_add_config_value('HighUtilizationForPowerSave','75','general');
+select fn_db_add_config_value('HostPreparingForMaintenanceIdleTime', '300', 
'general');
 select fn_db_add_config_value('HostTimeDriftInSec','300','general');
 select fn_db_add_config_value('HotPlugEnabled','false','3.0');
 select fn_db_add_config_value('HotPlugEnabled','true','3.1');
diff --git a/packaging/dbscripts/vms_sp.sql b/packaging/dbscripts/vms_sp.sql
index 38f26cd..7225dd9 100644
--- a/packaging/dbscripts/vms_sp.sql
+++ b/packaging/dbscripts/vms_sp.sql
@@ -861,8 +861,7 @@
 BEGIN
 RETURN QUERY SELECT DISTINCT vms.*
    FROM vms
-   WHERE migrating_to_vds = v_vds_id
-   AND status = 6;
+   WHERE migrating_to_vds = v_vds_id;
 
 END; $procedure$
 LANGUAGE plpgsql;
diff --git a/packaging/etc/engine-config/engine-config.properties 
b/packaging/etc/engine-config/engine-config.properties
index 8db5a49..57be3f9 100644
--- a/packaging/etc/engine-config/engine-config.properties
+++ b/packaging/etc/engine-config/engine-config.properties
@@ -340,3 +340,6 @@
 ExternalSchedulerResponseTimeout.type=Integer
 DwhHeartBeatInterval.description="Interval after which DWH is informed that 
engine is running (in seconds)"
 DwhHeartBeatInterval.type=Integer
+HostPreparingForMaintenanceIdleTime.type=Integer
+HostPreparingForMaintenanceIdleTime.description="Time to wait in seconds to 
determine if host is idling in status PreparingForMaintenace. When that 
interval is met it triggers another attempt to move the host to Maintenance"
+


-- 
To view, visit http://gerrit.ovirt.org/20899
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0e18bcfbc566b6fd92d276c5c739ba607e2a53f0
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: ovirt-engine-3.3
Gerrit-Owner: Roy Golan <rgo...@redhat.com>
_______________________________________________
Engine-patches mailing list
Engine-patches@ovirt.org
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to