Sahina Bose has uploaded a new change for review.

Change subject: engine: Avoid race condition between gluster peer probe and 
status
......................................................................

engine: Avoid race condition between gluster peer probe and status

During add of a gluster host, a gluster peer probe command is executed.
If a gluster peer status is also fired at the same time from GlusterSyncJob
this results in the newly added host being removed from the engine database,
as the gluster peer status has not yet recognized the newly added host.

Added lock at cluster level before peer probe to avoid this.

Change-Id: Iaa76cea4c5b5bf7c444680260dd4da5b65665428
Bug-Url: https://bugzilla.redhat.com/972619
Signed-off-by: Sahina Bose <sab...@redhat.com>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
1 file changed, 41 insertions(+), 24 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/15/21015/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
index f9435a9..c1ea010 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/InitVdsOnUpCommand.java
@@ -35,11 +35,14 @@
 import org.ovirt.engine.core.common.businessentities.gluster.GlusterServerInfo;
 import 
org.ovirt.engine.core.common.businessentities.network.VdsNetworkInterface;
 import org.ovirt.engine.core.common.errors.VdcBLLException;
+import org.ovirt.engine.core.common.errors.VdcBllMessages;
 import org.ovirt.engine.core.common.eventqueue.Event;
 import org.ovirt.engine.core.common.eventqueue.EventQueue;
 import org.ovirt.engine.core.common.eventqueue.EventResult;
 import org.ovirt.engine.core.common.eventqueue.EventType;
 import org.ovirt.engine.core.common.gluster.GlusterFeatureSupported;
+import org.ovirt.engine.core.common.locks.LockingGroup;
+import org.ovirt.engine.core.common.utils.Pair;
 import 
org.ovirt.engine.core.common.vdscommands.ConnectStoragePoolVDSCommandParameters;
 import org.ovirt.engine.core.common.vdscommands.MomPolicyVDSParameters;
 import org.ovirt.engine.core.common.vdscommands.VDSCommandType;
@@ -58,6 +61,7 @@
 import org.ovirt.engine.core.utils.ejb.BeanProxyType;
 import org.ovirt.engine.core.utils.ejb.BeanType;
 import org.ovirt.engine.core.utils.ejb.EjbUtils;
+import org.ovirt.engine.core.utils.lock.EngineLock;
 import org.ovirt.engine.core.vdsbroker.attestation.AttestationService;
 import org.ovirt.engine.core.vdsbroker.attestation.AttestationValue;
 import org.ovirt.engine.core.vdsbroker.irsbroker.IrsBrokerCommand;
@@ -342,38 +346,51 @@
     }
 
     private boolean initGlusterPeerProcess() {
-        glusterPeerListSucceeded = true;
-        glusterPeerProbeSucceeded = true;
-        List<VDS> vdsList = 
getVdsDAO().getAllForVdsGroupWithStatus(getVdsGroupId(), VDSStatus.Up);
-        // If the cluster already having Gluster servers, get an up server
-        if (vdsList != null && vdsList.size() > 0) {
-            VDS upServer = null;
-            for (VDS vds : vdsList) {
-                if (!getVdsId().equals(vds.getId())) {
-                    upServer = vds;
-                    break;
+        /* Acquiring a wait lock only during a gluster peer process
+          If "gluster peer probe" and "gluster peer status" are executed 
simultaneously, the results
+          are unpredictable. Hence locking the cluster to ensure the sync job 
does not lead to race
+          condition.*/
+        Map<String, Pair<String, String>> exclusiveLocks = new HashMap<String, 
Pair<String, String>>();
+        exclusiveLocks.put(getVds().getVdsGroupId().toString(),
+                LockMessagesMatchUtil.makeLockingPair(LockingGroup.GLUSTER, 
VdcBllMessages.ACTION_TYPE_FAILED_GLUSTER_OPERATION_INPROGRESS));
+        EngineLock lock = new EngineLock(exclusiveLocks, null);
+        getLockManager().acquireLockWait(lock);
+        try {
+            glusterPeerListSucceeded = true;
+            glusterPeerProbeSucceeded = true;
+            List<VDS> vdsList = 
getVdsDAO().getAllForVdsGroupWithStatus(getVdsGroupId(), VDSStatus.Up);
+            // If the cluster already having Gluster servers, get an up server
+            if (vdsList != null && vdsList.size() > 0) {
+                VDS upServer = null;
+                for (VDS vds : vdsList) {
+                    if (!getVdsId().equals(vds.getId())) {
+                        upServer = vds;
+                        break;
+                    }
                 }
-            }
 
-            // If new server is not part of the existing gluster peers, add 
into peer group
-            if (upServer != null) {
-                List<GlusterServerInfo> glusterServers = 
getGlusterPeers(upServer.getId());
-                Map<String, String> customLogValues = new HashMap<String, 
String>();
-                customLogValues.put("Server", upServer.getHostName());
-                if (glusterServers.size() == 0) {
-                    customLogValues.put("Command", "gluster peer status");
-                    
setNonOperational(NonOperationalReason.GLUSTER_COMMAND_FAILED, customLogValues);
-                    return false;
-                } else if (!hostExists(glusterServers, getVds())) {
-                    if (!glusterPeerProbe(upServer.getId(), 
getVds().getHostName())) {
-                        customLogValues.put("Command", "gluster peer probe " + 
getVds().getHostName());
+                // If new server is not part of the existing gluster peers, 
add into peer group
+                if (upServer != null) {
+                    List<GlusterServerInfo> glusterServers = 
getGlusterPeers(upServer.getId());
+                    Map<String, String> customLogValues = new HashMap<String, 
String>();
+                    customLogValues.put("Server", upServer.getHostName());
+                    if (glusterServers.size() == 0) {
+                        customLogValues.put("Command", "gluster peer status");
                         
setNonOperational(NonOperationalReason.GLUSTER_COMMAND_FAILED, customLogValues);
                         return false;
+                    } else if (!hostExists(glusterServers, getVds())) {
+                        if (!glusterPeerProbe(upServer.getId(), 
getVds().getHostName())) {
+                            customLogValues.put("Command", "gluster peer probe 
" + getVds().getHostName());
+                            
setNonOperational(NonOperationalReason.GLUSTER_COMMAND_FAILED, customLogValues);
+                            return false;
+                        }
                     }
                 }
             }
+            return true;
+        } finally {
+            getLockManager().releaseLock(lock);
         }
-        return true;
     }
 
     private boolean hostExists(List<GlusterServerInfo> glusterServers, VDS 
server) {


-- 
To view, visit http://gerrit.ovirt.org/21015
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iaa76cea4c5b5bf7c444680260dd4da5b65665428
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Sahina Bose <sab...@redhat.com>
_______________________________________________
Engine-patches mailing list
Engine-patches@ovirt.org
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to