Sahina Bose has uploaded a new change for review. Change subject: engine: Avoid race condition between gluster peer probe and status ......................................................................
engine: Avoid race condition between gluster peer probe and status During add of a gluster host, a gluster peer probe command is executed. If a gluster peer status is also fired at the same time from GlusterSyncJob this results in the newly added host being removed from the engine database, as the gluster peer status has not yet recognized the newly added host. Added lock at cluster level before peer probe to avoid this. Change-Id: Iaa76cea4c5b5bf7c444680260dd4da5b65665428 Bug-Url: https://bugzilla.redhat.com/972619 Signed-off-by: Sahina Bose <sab...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java 1 file changed, 41 insertions(+), 24 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/15/21015/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java index f9435a9..c1ea010 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java @@ -35,11 +35,14 @@ import org.ovirt.engine.core.common.businessentities.gluster.GlusterServerInfo; import org.ovirt.engine.core.common.businessentities.network.VdsNetworkInterface; import org.ovirt.engine.core.common.errors.VdcBLLException; +import org.ovirt.engine.core.common.errors.VdcBllMessages; import org.ovirt.engine.core.common.eventqueue.Event; import org.ovirt.engine.core.common.eventqueue.EventQueue; import org.ovirt.engine.core.common.eventqueue.EventResult; import org.ovirt.engine.core.common.eventqueue.EventType; import org.ovirt.engine.core.common.gluster.GlusterFeatureSupported; +import org.ovirt.engine.core.common.locks.LockingGroup; +import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.common.vdscommands.ConnectStoragePoolVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.MomPolicyVDSParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; @@ -58,6 +61,7 @@ import org.ovirt.engine.core.utils.ejb.BeanProxyType; import org.ovirt.engine.core.utils.ejb.BeanType; import org.ovirt.engine.core.utils.ejb.EjbUtils; +import org.ovirt.engine.core.utils.lock.EngineLock; import org.ovirt.engine.core.vdsbroker.attestation.AttestationService; import org.ovirt.engine.core.vdsbroker.attestation.AttestationValue; import org.ovirt.engine.core.vdsbroker.irsbroker.IrsBrokerCommand; @@ -342,38 +346,51 @@ } private boolean initGlusterPeerProcess() { - glusterPeerListSucceeded = true; - glusterPeerProbeSucceeded = true; - List<VDS> vdsList = getVdsDAO().getAllForVdsGroupWithStatus(getVdsGroupId(), VDSStatus.Up); - // If the cluster already having Gluster servers, get an up server - if (vdsList != null && vdsList.size() > 0) { - VDS upServer = null; - for (VDS vds : vdsList) { - if (!getVdsId().equals(vds.getId())) { - upServer = vds; - break; + /* Acquiring a wait lock only during a gluster peer process + If "gluster peer probe" and "gluster peer status" are executed simultaneously, the results + are unpredictable. Hence locking the cluster to ensure the sync job does not lead to race + condition.*/ + Map<String, Pair<String, String>> exclusiveLocks = new HashMap<String, Pair<String, String>>(); + exclusiveLocks.put(getVds().getVdsGroupId().toString(), + LockMessagesMatchUtil.makeLockingPair(LockingGroup.GLUSTER, VdcBllMessages.ACTION_TYPE_FAILED_GLUSTER_OPERATION_INPROGRESS)); + EngineLock lock = new EngineLock(exclusiveLocks, null); + getLockManager().acquireLockWait(lock); + try { + glusterPeerListSucceeded = true; + glusterPeerProbeSucceeded = true; + List<VDS> vdsList = getVdsDAO().getAllForVdsGroupWithStatus(getVdsGroupId(), VDSStatus.Up); + // If the cluster already having Gluster servers, get an up server + if (vdsList != null && vdsList.size() > 0) { + VDS upServer = null; + for (VDS vds : vdsList) { + if (!getVdsId().equals(vds.getId())) { + upServer = vds; + break; + } } - } - // If new server is not part of the existing gluster peers, add into peer group - if (upServer != null) { - List<GlusterServerInfo> glusterServers = getGlusterPeers(upServer.getId()); - Map<String, String> customLogValues = new HashMap<String, String>(); - customLogValues.put("Server", upServer.getHostName()); - if (glusterServers.size() == 0) { - customLogValues.put("Command", "gluster peer status"); - setNonOperational(NonOperationalReason.GLUSTER_COMMAND_FAILED, customLogValues); - return false; - } else if (!hostExists(glusterServers, getVds())) { - if (!glusterPeerProbe(upServer.getId(), getVds().getHostName())) { - customLogValues.put("Command", "gluster peer probe " + getVds().getHostName()); + // If new server is not part of the existing gluster peers, add into peer group + if (upServer != null) { + List<GlusterServerInfo> glusterServers = getGlusterPeers(upServer.getId()); + Map<String, String> customLogValues = new HashMap<String, String>(); + customLogValues.put("Server", upServer.getHostName()); + if (glusterServers.size() == 0) { + customLogValues.put("Command", "gluster peer status"); setNonOperational(NonOperationalReason.GLUSTER_COMMAND_FAILED, customLogValues); return false; + } else if (!hostExists(glusterServers, getVds())) { + if (!glusterPeerProbe(upServer.getId(), getVds().getHostName())) { + customLogValues.put("Command", "gluster peer probe " + getVds().getHostName()); + setNonOperational(NonOperationalReason.GLUSTER_COMMAND_FAILED, customLogValues); + return false; + } } } } + return true; + } finally { + getLockManager().releaseLock(lock); } - return true; } private boolean hostExists(List<GlusterServerInfo> glusterServers, VDS server) { -- To view, visit http://gerrit.ovirt.org/21015 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iaa76cea4c5b5bf7c444680260dd4da5b65665428 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: master Gerrit-Owner: Sahina Bose <sab...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches