Eli Mesika has uploaded a new change for review.

Change subject: core: handle fence agent power wait param on stop
......................................................................

core: handle fence agent power wait param on stop

When a host restart is dome manually or as a result of a
non-responsive host treatment and in the case that power wait parameter
is used the host may stay in 'off' state and even release its lock on HA
VMs before the host is really down

This is the scenario:
1) A restart command is issued and actually performed as
   stop -> wait for 'off' status -> start -> wait for 'on' status
2) power wait parameter is added to the command implicitly or explicitly
giving a delay of X seconds before the operation is actually performed
3) The fence agent script returns immediately with 'off' status
4) A 'on' command is sent to the fence agent by the start operation
5) X seconds passed and the host is actually shutdown

This patch handles this by adding a new configuration value that maps
fence agents to the name of the parameter for power wait

Upon stop operation, we will wait a fixed delay (5 seconds hard-coded)
before starting to sample the host status, if power wait parameter is
used, the value is extracted and we will wait for 5 + X seconds.

Change-Id: I310e076ecf84988cacd0b179954d2460d7988b91
Bug-Url: https://bugzilla.redhat.com/show_bug.cgi?id=1114618
Signed-off-by: Eli Mesika <emes...@redhat.com>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java
M 
backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
M 
backend/manager/modules/utils/src/main/java/org/ovirt/engine/core/utils/pm/VdsFenceOptions.java
M packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
4 files changed, 69 insertions(+), 1 deletion(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/26/29426/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java
index 7195328..866a64f 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/FenceVdsBaseCommand.java
@@ -2,12 +2,14 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.lang.StringUtils;
 import org.ovirt.engine.core.bll.job.ExecutionHandler;
@@ -21,6 +23,7 @@
 import org.ovirt.engine.core.common.businessentities.VDSStatus;
 import org.ovirt.engine.core.common.businessentities.VM;
 import org.ovirt.engine.core.common.businessentities.VMStatus;
+import org.ovirt.engine.core.common.businessentities.VdsStatic;
 import org.ovirt.engine.core.common.businessentities.VmExitStatus;
 import org.ovirt.engine.core.common.config.Config;
 import org.ovirt.engine.core.common.config.ConfigValues;
@@ -39,6 +42,7 @@
 import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector;
 import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase;
 import org.ovirt.engine.core.utils.ThreadUtils;
+import org.ovirt.engine.core.utils.pm.VdsFenceOptions;
 import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil;
 
 public abstract class FenceVdsBaseCommand<T extends FenceVdsActionParameters> 
extends VdsCommand<T> {
@@ -499,6 +503,36 @@
         setVmName(null);
     }
 
+    private int getSleep(FenceActionType actionType, FenceAgentOrder order) {
+        if (actionType != FenceActionType.Stop) {
+            return SLEEP_BEFORE_FIRST_ATTEMPT;
+        }
+        // We have to find out if power off delay was used and add this to the 
wait time
+        // since otherwise the command will return immediately with 'off' 
status and
+        // subsequent 'on' command issued during this delay will be overridden 
by the actual shutdown
+        String agent = (order == FenceAgentOrder.Primary) ? 
getVds().getPmType() : getVds().getPmSecondaryType();
+        String options =  (order == FenceAgentOrder.Primary) ? 
getVds().getPmOptions() : getVds().getPmSecondaryOptions();
+        options = VdsFenceOptions.getDefaultAgentOptions(agent, options);
+        HashMap<String, String> optionsMap = 
VdsStatic.pmOptionsStringToMap(options);
+        String powerWaitParamSettings = 
Config.getValue(ConfigValues.FencePowerWaitParam);
+        String powerWaitParam = VdsFenceOptions.getAgentPowerWaitParam(agent, 
powerWaitParamSettings);
+        if (powerWaitParam == null) {
+            // no power wait for this agent
+            return SLEEP_BEFORE_FIRST_ATTEMPT;
+        }
+        if (optionsMap.containsKey(powerWaitParam)) {
+            try {
+                Integer powerWaitValueInSec = 
Integer.parseInt(optionsMap.get(powerWaitParam));
+                return SLEEP_BEFORE_FIRST_ATTEMPT + (int) 
TimeUnit.SECONDS.toMillis(powerWaitValueInSec);
+            }
+            catch(NumberFormatException nfe) {
+                // illegal value
+                return SLEEP_BEFORE_FIRST_ATTEMPT;
+            }
+        }
+        return SLEEP_BEFORE_FIRST_ATTEMPT;
+    }
+
     protected void setStatus() {
         Backend.getInstance()
                 .getResourceManager()
@@ -524,11 +558,12 @@
         int i = 1;
         boolean statusReached = false;
         log.infoFormat("Waiting for vds {0} to {1}", vdsName, ACTION_NAME);
+
         // Waiting before first attempt to check the host status.
         // This is done because if we will attempt to get host status 
immediately
         // in most cases it will not turn from on/off to off/on and we will 
need
         // to wait a full cycle for it.
-        ThreadUtils.sleep(SLEEP_BEFORE_FIRST_ATTEMPT);
+        ThreadUtils.sleep(getSleep(actionType, order));
         while (!statusReached && i <= getRerties()) {
             log.infoFormat("Attempt {0} to get vds {1} status", i, vdsName);
             if (executor.findProxyHost()) {
diff --git 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
index bbdccee..720af50 100644
--- 
a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
+++ 
b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/config/ConfigValues.java
@@ -1651,5 +1651,12 @@
     @DefaultValueAttribute("true")
     IscsiMultipathingSupported,
 
+    /**
+     * Defines the parameter name used by the agent script to delay host on/off
+     */
+    @TypeConverterAttribute(String.class)
+    
@DefaultValueAttribute("apc=power_wait,apc_snmp=power_wait,bladecenter=power_wait,cisco_ucs=power_wait,drac5=power_wait,drac7=power_wait,eps=delay,hpblade=power_wait,ilo=power_wait,ilo2=power_wait,ilo3=power_wait,ilo4=power_wait,ipmilan=power_wait,rsa=power_wait,rsb=power_wait,wti=power_wait")
+    FencePowerWaitParam,
+
     Invalid;
 }
diff --git 
a/backend/manager/modules/utils/src/main/java/org/ovirt/engine/core/utils/pm/VdsFenceOptions.java
 
b/backend/manager/modules/utils/src/main/java/org/ovirt/engine/core/utils/pm/VdsFenceOptions.java
index b21f91f..b84f643 100644
--- 
a/backend/manager/modules/utils/src/main/java/org/ovirt/engine/core/utils/pm/VdsFenceOptions.java
+++ 
b/backend/manager/modules/utils/src/main/java/org/ovirt/engine/core/utils/pm/VdsFenceOptions.java
@@ -289,6 +289,31 @@
         return realAgent;
     }
 
+
+    /**
+     * handles agent power wait parameter mapping
+     * @param agent
+     * @param powerWait
+     * @return
+     */
+    public static String getAgentPowerWaitParam(String agent, String 
powerWait) {
+        String param = null;
+        // result has the format [<agent>=<power wait param name>[,]]*
+        String[] settings = powerWait.split(Pattern.quote(COMMA), -1);
+        if (settings.length > 0) {
+            for (String setting : settings) {
+                String[] pair = setting.split(Pattern.quote(EQUAL), -1);
+                if (pair.length == 2) {
+                    if (agent.equalsIgnoreCase(pair[0])) {
+                        param = pair[1];
+                        break;
+                    }
+                }
+            }
+        }
+        return param;
+    }
+
     /**
      * handles agent default options
      *
diff --git a/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql 
b/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
index 159d395..6d22d7a 100644
--- a/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
+++ b/packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
@@ -111,6 +111,7 @@
 select fn_db_add_config_value('FenceStartStatusRetries','3','general');
 select 
fn_db_add_config_value('FenceStopStatusDelayBetweenRetriesInSec','60','general');
 select fn_db_add_config_value('FenceStopStatusRetries','3','general');
+select 
fn_db_add_config_value('FencePowerWaitParam','apc=power_wait,apc_snmp=power_wait,bladecenter=power_wait,cisco_ucs=power_wait,drac5=power_wait,drac7=power_wait,eps=delay,hpblade=power_wait,ilo=power_wait,ilo2=power_wait,ilo3=power_wait,ilo4=power_wait,ipmilan=power_wait,rsa=power_wait,rsb=power_wait,wti=power_wait','general');
 select fn_db_add_config_value('FilteringLUNsEnabled','true','3.0');
 select 
fn_db_add_config_value('FindFenceProxyDelayBetweenRetriesInSec','30','general');
 select fn_db_add_config_value('FindFenceProxyRetries','3','general');


-- 
To view, visit http://gerrit.ovirt.org/29426
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I310e076ecf84988cacd0b179954d2460d7988b91
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: ovirt-engine-3.4
Gerrit-Owner: Eli Mesika <emes...@redhat.com>
_______________________________________________
Engine-patches mailing list
Engine-patches@ovirt.org
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to