Martin Peřina has uploaded a new change for review. Change subject: core: Make SSH Soft Fencing part of Non Responding Treatment ......................................................................
core: Make SSH Soft Fencing part of Non Responding Treatment Makes SSH Soft Fencing execution 1st step of Non Responding Treatment execution. Change-Id: I99770873470541bc7f6c3a7338f4bcc10f5e4fb3 Bug-Url: https://bugzilla.redhat.com/1182510 Signed-off-by: Martin Perina <mper...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/SshSoftFencingCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java M backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java 6 files changed, 78 insertions(+), 56 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/66/36966/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java index 3f91eb0..888a0d0 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/RestartVdsCommand.java @@ -90,7 +90,7 @@ return canDo; } - private boolean isQuietTimeFromLastActionPassed() { + protected boolean isQuietTimeFromLastActionPassed() { // Check Quiet time between PM operations, this is done only if command is not internal. int secondsLeftToNextPmOp = (isInternalExecution()) ? 0 : DbFacade.getInstance() diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/SshSoftFencingCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/SshSoftFencingCommand.java index 8ac6819..fd82d643 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/SshSoftFencingCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/SshSoftFencingCommand.java @@ -6,9 +6,12 @@ import org.ovirt.engine.core.bll.context.CommandContext; import org.ovirt.engine.core.bll.utils.EngineSSHClient; import org.ovirt.engine.core.common.action.VdsActionParameters; +import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.config.Config; import org.ovirt.engine.core.common.config.ConfigValues; +import org.ovirt.engine.core.utils.ThreadUtils; import org.ovirt.engine.core.vdsbroker.ResourceManager; +import org.ovirt.engine.core.vdsbroker.VdsManager; /** * Tries to restart VDSM using SSH connection @@ -52,8 +55,8 @@ if (validator.shouldVdsBeFenced()) { boolean result = executeSshSoftFencingCommand(getVds().getVdsGroupCompatibilityVersion().toString()); if (result) { - // SSH Soft Fencing executed without errors, tell VdsManager about it - ResourceManager.getInstance().GetVdsManager(getVds().getId()).finishSshSoftFencingExecution(getVds()); + // SSH Soft Fencing executed successfully, check if host become Up + result = checkIfHostBecomeUp(); } getReturnValue().setSucceeded(result); } else { @@ -69,9 +72,9 @@ /** * Executes SSH Soft Fencing command * - * @param host - * host to execute SSH Soft Fencing command on - * @returns {@code true} if command has been executed successfully, {@code false} otherwise + * @param version + * cluster compatibility version to acquire correct command to restart VDSM + * @return {@code true} if command has been executed successfully, {@code false} otherwise */ private boolean executeSshSoftFencingCommand(String version) { boolean ret = false; @@ -109,4 +112,28 @@ } return ret; } + + /** + * Check if host become Up after successful SSH Soft Fencing execution until grace period is over + * + * @return {@code true} if host became Up during grace period, otherwise {@code false} + */ + private boolean checkIfHostBecomeUp() { + VdsManager vdsManager = getResourceManager().GetVdsManager(getVdsId()); + int sleepInterval = Config.<Integer> getValue(ConfigValues.VdsRefreshRate) * 1000; + while (vdsManager.isHostInGracePeriod(true)) { + if (vdsManager.getCopyVds().getStatus() == VDSStatus.Up) { + // host became Up during grace period + return true; + } + // wait until next host monitoring attempt + ThreadUtils.sleep(sleepInterval); + } + return false; + } + + public ResourceManager getResourceManager() { + // TODO: fix when ResourceManager could be injected + return ResourceManager.getInstance(); + } } diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java index 6185999..c7289bd 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsEventListener.java @@ -7,7 +7,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.Callable; - import javax.enterprise.event.Observes; import javax.enterprise.inject.Instance; import javax.inject.Inject; @@ -74,7 +73,6 @@ import org.ovirt.engine.core.utils.lock.EngineLock; import org.ovirt.engine.core.utils.lock.LockManagerFactory; import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil; -import org.ovirt.engine.core.vdsbroker.MonitoringStrategyFactory; import org.ovirt.engine.core.vdsbroker.ResourceManager; import org.ovirt.engine.core.vdsbroker.irsbroker.IrsBrokerCommand; import org.ovirt.engine.core.vdsbroker.vdsbroker.VDSNetworkException; @@ -236,7 +234,7 @@ } @Override - public void vdsNotResponding(final VDS vds, final boolean executeSshSoftFencing, final long lastUpdate) { + public void vdsNotResponding(final VDS vds, final long lastUpdate) { ExecutionHandler.updateSpecificActionJobCompleted(vds.getId(), VdcActionType.MaintenanceVds, false); ThreadPoolUtil.execute(new Runnable() { @Override @@ -245,28 +243,14 @@ vds.getId(), vds.getHostName()); - boolean shouldExecRealFencing = true; + FenceVdsActionParameters params = new FenceVdsActionParameters( + vds.getId(), + FenceActionType.Restart); + params.setLastUpdate(lastUpdate); + Backend.getInstance().runInternalAction(VdcActionType.VdsNotRespondingTreatment, + params, + ExecutionHandler.createInternalJobContext()); - if (executeSshSoftFencing) { - VdcReturnValueBase retVal = - Backend.getInstance().runInternalAction(VdcActionType.SshSoftFencing, - new VdsActionParameters(vds.getId()), - ExecutionHandler.createInternalJobContext()); - // if SSH Soft Fencing command execution was successful, do not execute - // standard fencing immediately, but wait to see if it helped - shouldExecRealFencing = !retVal.getSucceeded(); - } - - if (MonitoringStrategyFactory.getMonitoringStrategyForVds(vds).isPowerManagementSupported() - && shouldExecRealFencing) { - FenceVdsActionParameters params = new FenceVdsActionParameters( - vds.getId(), - FenceActionType.Restart); - params.setLastUpdate(lastUpdate); - Backend.getInstance().runInternalAction(VdcActionType.VdsNotRespondingTreatment, - params, - ExecutionHandler.createInternalJobContext()); - } moveBricksToUnknown(vds); } }); diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java index c56e817..6227c87 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/VdsNotRespondingTreatmentCommand.java @@ -8,6 +8,7 @@ import org.apache.commons.lang.ObjectUtils; import org.ovirt.engine.core.bll.context.CommandContext; import org.ovirt.engine.core.bll.job.ExecutionContext; +import org.ovirt.engine.core.bll.validator.FenceValidator; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.VdcObjectType; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; @@ -30,6 +31,7 @@ import org.ovirt.engine.core.utils.ThreadUtils; import org.ovirt.engine.core.utils.linq.LinqUtils; import org.ovirt.engine.core.utils.linq.Predicate; +import org.ovirt.engine.core.vdsbroker.MonitoringStrategyFactory; /** * @see RestartVdsCommand on why this command is requiring a lock @@ -74,6 +76,21 @@ return false; } + @Override + protected boolean canDoAction() { + FenceValidator fenceValidator = new FenceValidator(); + List<String> messages = getReturnValue().getCanDoActionMessages(); + boolean canDo = + fenceValidator.isHostExists(getVds(), messages) + && fenceValidator.isStartupTimeoutPassed(messages) + && isQuietTimeFromLastActionPassed(); + if (!canDo) { + handleError(); + } + getReturnValue().setSucceeded(canDo); + return canDo; + } + /** * Only fence the host if the VDS is down, otherwise it might have gone back up until this command was executed. If * the VDS is not fenced then don't send an audit log event. @@ -98,11 +115,26 @@ boolean shouldBeFenced = validator.shouldVdsBeFenced(); if (shouldBeFenced) { getParameters().setParentCommand(VdcActionType.VdsNotRespondingTreatment); - VdcReturnValueBase retVal = - runInternalAction(VdcActionType.VdsKdumpDetection, - getParameters(), - getContext()); + VdcReturnValueBase retVal; + retVal = runInternalAction(VdcActionType.SshSoftFencing, + getParameters(), + getContext()); + if (retVal.getSucceeded()) { + // SSH Soft Fencing was successful and host is Up, stop non responding treatment + getReturnValue().setSucceeded(true); + return; + } + + // proceed with non responding treatment only if PM action are allowed + if (!MonitoringStrategyFactory.getMonitoringStrategyForVds(getVds()).isPowerManagementSupported()) { + setSucceeded(false); + setCommandShouldBeLogged(false); + } + + retVal = runInternalAction(VdcActionType.VdsKdumpDetection, + getParameters(), + getContext()); if (retVal.getSucceeded()) { // kdump on host detected and finished successfully, stop hard fencing execution getReturnValue().setSucceeded(true); diff --git a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java index c30f9f6..81ed161 100644 --- a/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java +++ b/backend/manager/modules/common/src/main/java/org/ovirt/engine/core/common/businessentities/IVdsEventListener.java @@ -11,7 +11,7 @@ import org.ovirt.engine.core.compat.TransactionScopeOption; public interface IVdsEventListener { - void vdsNotResponding(VDS vds, boolean executeSshSoftFencing, long lastUpdate); // BLL + void vdsNotResponding(VDS vds, long lastUpdate); // BLL void vdsNonOperational(Guid vdsId, NonOperationalReason type, boolean logCommand, Guid domainId); // BLL diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java index 3f4f77d..47da440 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/VdsManager.java @@ -68,7 +68,6 @@ private final Object lockObj = new Object(); private final AtomicInteger mFailedToRunVmAttempts; private final AtomicInteger mUnrespondedAttempts; - private final AtomicBoolean sshSoftFencingExecuted; private final Guid vdsId; private final VdsMonitor vdsMonitor = new VdsMonitor(); private final HostNetworkTopologyPersister hostNetworkTopologyPersister; @@ -96,7 +95,6 @@ monitoringStrategy = MonitoringStrategyFactory.getMonitoringStrategyForVds(vds); mUnrespondedAttempts = new AtomicInteger(); mFailedToRunVmAttempts = new AtomicInteger(); - sshSoftFencingExecuted = new AtomicBoolean(false); monitoringLock = new EngineLock(Collections.singletonMap(vdsId.toString(), new Pair<String, String>(LockingGroup.VDS_INIT.name(), "")), null); hostNetworkTopologyPersister = HostNetworkTopologyPersisterImpl.getInstance(); @@ -205,7 +203,6 @@ hostMonitoring = new HostMonitoring(VdsManager.this, cachedVds, monitoringStrategy); hostMonitoring.refresh(); mUnrespondedAttempts.set(0); - sshSoftFencingExecuted.set(false); setLastUpdate(); } if (!isInitialized() && cachedVds.getStatus() != VDSStatus.NonResponsive @@ -585,7 +582,6 @@ */ public void succededToRunVm(Guid vmId) { mUnrespondedAttempts.set(0); - sshSoftFencingExecuted.set(false); ResourceManager.getInstance().succededToRunVm(vmId, getVdsId()); } @@ -707,7 +703,6 @@ logHostFailToResponde(ex, timeoutToFence); ResourceManager.getInstance().getEventListener().vdsNotResponding( cachedVds, - !sshSoftFencingExecuted.getAndSet(true), lastUpdate); } else { setStatus(VDSStatus.NonResponsive, cachedVds); @@ -828,22 +823,6 @@ */ public VdsMonitor getVdsMonitor() { return vdsMonitor; - } - - /** - * Resets counter to test VDS response and changes state to Connecting after successful SSH Soft Fencing execution. - * Changing state to Connecting tells VdsManager to monitor VDS and if VDS doesn't change state to Up, VdsManager - * will execute standard fencing after timeout interval. - * - * @param vds - * VDS that SSH Soft Fencing has been executed on - */ - public void finishSshSoftFencingExecution(VDS vds) { - // reset the unresponded counter to wait if VDSM restart helps - mUnrespondedAttempts.set(0); - // change VDS state to connecting - setStatus(VDSStatus.Connecting, vds); - updateDynamicData(vds.getDynamicData()); } public void calculateNextMaintenanceAttemptTime() { -- To view, visit http://gerrit.ovirt.org/36966 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I99770873470541bc7f6c3a7338f4bcc10f5e4fb3 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: master Gerrit-Owner: Martin Peřina <mper...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches