Liron Aravot has uploaded a new change for review.

Change subject: core: WIP :prevent simultaneous ReconstructsMasterDomain on the 
same pool(#845838)
......................................................................

core: WIP :prevent simultaneous ReconstructsMasterDomain on the same
pool(#845838)

https://bugzilla.redhat.com/845838

ReconsturctMasterDomain command can be triggered from the GUI (by
executing Reinitialize DC) and by IrsProxyData timer which may cause
to two different ReconstructMasterDomain commands run on the same pool
simultanously, this patch prevents it by acquiring lock on the
IrsProxyData object which represents the pool while executing the
ReconstructMasterDomain command.

Change-Id: Ia0dfb227b014af602e5bd1ab952d7c543992aa0f
Signed-off-by: Liron Aravot <lara...@redhat.com>
---
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java
M 
backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java
M 
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
3 files changed, 145 insertions(+), 77 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/82/7982/1

diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java
index 7a7aac1..feef787 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java
@@ -33,6 +33,7 @@
 import org.ovirt.engine.core.dal.VdcBllMessages;
 import org.ovirt.engine.core.dal.dbbroker.DbFacade;
 import org.ovirt.engine.core.utils.transaction.TransactionMethod;
+import org.ovirt.engine.core.vdsbroker.irsbroker.IrsBrokerCommand;
 
 @SuppressWarnings("serial")
 @NonTransactiveCommandAttribute(forceCompensation = true)
@@ -54,6 +55,23 @@
 
     @Override
     protected boolean canDoAction() {
+        boolean toReturn = false;
+        if (IrsBrokerCommand.tryLockIrsProxyData(getStoragePool().getId())) {
+            try {
+                toReturn = performChecks();
+            } catch (Exception e) {
+                IrsBrokerCommand.unLockIrsProxyData(getStoragePool().getId());
+                toReturn = false;
+            } finally {
+                if (!toReturn) {
+                    
IrsBrokerCommand.unLockIrsProxyData(getStoragePool().getId());
+                }
+            }
+        }
+        return toReturn;
+    }
+
+    protected boolean performChecks(){
         List<storage_pool_iso_map> poolDomains = DbFacade.getInstance()
                 
.getStoragePoolIsoMapDAO().getAllForStoragePool(getStoragePool().getId());
         for (storage_pool_iso_map poolDomain : poolDomains) {
@@ -173,6 +191,9 @@
                             new 
MarkPoolInReconstructModeVDSCommandParameters(getStoragePoolId()
                                     .getValue(), 
ReconstructMarkAction.ClearCache));
         }
+        // if we got here - it mean that the IrsProxyData lock was performed by
+        // canDoAction() method and that it's still held by this thread.
+        IrsBrokerCommand.unLockIrsProxyData(getStoragePool().getId());
     }
 
     protected boolean stopSpm() {
diff --git 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java
 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java
index 4f1a0be..a332249 100644
--- 
a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java
+++ 
b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java
@@ -47,8 +47,8 @@
     }
 
     @Override
-    protected boolean canDoAction() {
-        boolean returnValue = super.canDoAction() && checkStoragePool();
+    protected boolean performChecks() {
+        boolean returnValue = super.performChecks() && checkStoragePool();
         
getReturnValue().getCanDoActionMessages().remove(VdcBllMessages.VAR__ACTION__RECONSTRUCT_MASTER.toString());
 
         addCanDoActionMessage(VdcBllMessages.VAR__ACTION__RECOVER_POOL);
diff --git 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
index 3659a3f..050ee7e 100644
--- 
a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
+++ 
b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java
@@ -1040,7 +1040,7 @@
         private final Map<Guid, HashSet<Guid>> _domainsInProblem = new 
ConcurrentHashMap<Guid, HashSet<Guid>>();
         private final Map<Guid, String> _timers = new HashMap<Guid, String>();
         private AtomicBoolean duringReconstructMaster = new 
AtomicBoolean(false);
-        private final Object _lockObject = new Object();
+        private final ReentrantLock _lockObject = new ReentrantLock();
         private final ReentrantLock _lockObjForDbSave = new ReentrantLock();
 
         public void lockDbSave() {
@@ -1053,87 +1053,102 @@
             }
         }
 
+        public void lockIrsProxyLockObject() {
+            _lockObject.lock();
+        }
+
+        public boolean tryLockIrsProxyLockObject() {
+            return _lockObject.tryLock();
+        }
+
+        public void unlockIrsProxyLockObject() {
+            if (_lockObject.isHeldByCurrentThread()) {
+                _lockObject.unlock();
+            }
+        }
+
         public void UpdateVdsDomainsData(final Guid vdsId, final String 
vdsName,
                 final java.util.ArrayList<VDSDomainsData> data) {
 
             Set<Guid> domainsInProblems =
-                    (Set<Guid>) 
TransactionSupport.executeInScope(TransactionScopeOption.Suppress,
-                            new TransactionMethod<Set<Guid>>() {
-                                @Override
-                                public Set<Guid> runInTransaction() {
+                    
TransactionSupport.executeInScope(TransactionScopeOption.Suppress,
+                    new TransactionMethod<Set<Guid>>() {
+                        @Override
+                        public Set<Guid> runInTransaction() {
 
-                                    Set<Guid> domainsInProblems = null;
-                                    storage_pool storagePool =
-                                            
DbFacade.getInstance().getStoragePoolDAO().get(_storagePoolId);
-                                    if (storagePool != null
-                                            && (storagePool.getstatus() == 
StoragePoolStatus.Up || storagePool.getstatus() == 
StoragePoolStatus.Problematic)) {
+                            Set<Guid> domainsInProblems = null;
+                            storage_pool storagePool =
+                                    
DbFacade.getInstance().getStoragePoolDAO().get(_storagePoolId);
+                            if (storagePool != null
+                                    && (storagePool.getstatus() == 
StoragePoolStatus.Up || storagePool.getstatus() == 
StoragePoolStatus.Problematic)) {
 
-                                        try {
+                                try {
 
-                                            // build a list of all domains in 
pool
-                                            // which are in status Active or 
Unknown
-                                            Set<Guid> domainsInPool = new 
HashSet<Guid>(
-                                                    
DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds(
-                                                            _storagePoolId, 
StorageDomainStatus.Active));
-                                            
domainsInPool.addAll(DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds(
-                                                    _storagePoolId, 
StorageDomainStatus.Unknown));
+                                    // build a list of all domains in pool
+                                    // which are in status Active or Unknown
+                                    Set<Guid> domainsInPool = new 
HashSet<Guid>(
+                                            
DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds(
+                                                    _storagePoolId, 
StorageDomainStatus.Active));
+                                    
domainsInPool.addAll(DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds(
+                                            _storagePoolId, 
StorageDomainStatus.Unknown));
 
-                                            // build a list of all the domains 
in
-                                            // pool (domainsInPool) that are 
not
-                                            // visible by the host.
-                                            List<Guid> 
domainsInPoolThatNonVisibleByVds = new ArrayList<Guid>();
-                                            Set<Guid> dataDomainIds = new 
HashSet<Guid>();
-                                            for (VDSDomainsData tempData : 
data) {
-                                                
dataDomainIds.add(tempData.getDomainId());
-                                            }
-                                            for (Guid tempDomainId : 
domainsInPool) {
-                                                if 
(!dataDomainIds.contains(tempDomainId)) {
-                                                    
domainsInPoolThatNonVisibleByVds.add(tempDomainId);
-                                                }
-                                            }
-
-                                            // build a list of domains that 
the host
-                                            // reports as in problem (code!=0) 
or (code==0
-                                            // && lastChecl >
-                                            // 
ConfigValues.MaxStorageVdsTimeoutCheckSec)
-                                            // and are contained in the Active 
or
-                                            // Unknown domains in pool
-                                            List<Guid> 
domainsSeenByVdsInProblem = new ArrayList<Guid>();
-                                            for (VDSDomainsData tempData : 
data) {
-                                                if 
(domainsInPool.contains(tempData.getDomainId())) {
-                                                    if (tempData.getCode() != 
0) {
-                                                        
domainsSeenByVdsInProblem.add(tempData.getDomainId());
-                                                    } else if 
(tempData.getLastCheck() > Config
-                                                            .<Double> 
GetValue(ConfigValues.MaxStorageVdsTimeoutCheckSec)) {
-                                                        
domainsSeenByVdsInProblem.add(tempData.getDomainId());
-                                                    } else if 
(tempData.getDelay() > Config.<Double> 
GetValue(ConfigValues.MaxStorageVdsDelayCheckSec)) {
-                                                        AuditLogableBase 
logable = new AuditLogableBase();
-                                                        
logable.setVdsId(vdsId);
-                                                        
logable.setStorageDomainId(tempData.getDomainId());
-                                                        
logable.AddCustomValue("Delay",
-                                                                
Double.toString(tempData.getDelay()));
-                                                        
AuditLogDirector.log(logable,
-                                                                
AuditLogType.VDS_DOMAIN_DELAY_INTERVAL);
-                                                    }
-                                                }
-                                            }
-
-                                            // build a list of all potential 
domains
-                                            // in problem
-                                            domainsInProblems = new 
HashSet<Guid>();
-                                            
domainsInProblems.addAll(domainsInPoolThatNonVisibleByVds);
-                                            
domainsInProblems.addAll(domainsSeenByVdsInProblem);
-
-                                        } catch (RuntimeException ex) {
-                                            log.error("error in 
UpdateVdsDomainsData", ex);
-                                        }
-
+                                    // build a list of all the domains in
+                                    // pool (domainsInPool) that are not
+                                    // visible by the host.
+                                    List<Guid> 
domainsInPoolThatNonVisibleByVds = new ArrayList<Guid>();
+                                    Set<Guid> dataDomainIds = new 
HashSet<Guid>();
+                                    for (VDSDomainsData tempData : data) {
+                                        
dataDomainIds.add(tempData.getDomainId());
                                     }
-                                    return domainsInProblems;
+                                    for (Guid tempDomainId : domainsInPool) {
+                                        if 
(!dataDomainIds.contains(tempDomainId)) {
+                                            
domainsInPoolThatNonVisibleByVds.add(tempDomainId);
+                                        }
+                                    }
+
+                                    // build a list of domains that the host
+                                    // reports as in problem (code!=0) or 
(code==0
+                                    // && lastChecl >
+                                    // 
ConfigValues.MaxStorageVdsTimeoutCheckSec)
+                                    // and are contained in the Active or
+                                    // Unknown domains in pool
+                                    List<Guid> domainsSeenByVdsInProblem = new 
ArrayList<Guid>();
+                                    for (VDSDomainsData tempData : data) {
+                                        if 
(domainsInPool.contains(tempData.getDomainId())) {
+                                            if (tempData.getCode() != 0) {
+                                                
domainsSeenByVdsInProblem.add(tempData.getDomainId());
+                                            } else if (tempData.getLastCheck() 
> Config
+                                                    .<Double> 
GetValue(ConfigValues.MaxStorageVdsTimeoutCheckSec)) {
+                                                
domainsSeenByVdsInProblem.add(tempData.getDomainId());
+                                            } else if (tempData.getDelay() > 
Config.<Double> GetValue(ConfigValues.MaxStorageVdsDelayCheckSec)) {
+                                                AuditLogableBase logable = new 
AuditLogableBase();
+                                                logable.setVdsId(vdsId);
+                                                
logable.setStorageDomainId(tempData.getDomainId());
+                                                logable.AddCustomValue("Delay",
+                                                        
Double.toString(tempData.getDelay()));
+                                                AuditLogDirector.log(logable,
+                                                        
AuditLogType.VDS_DOMAIN_DELAY_INTERVAL);
+                                            }
+                                        }
+                                    }
+
+                                    // build a list of all potential domains
+                                    // in problem
+                                    domainsInProblems = new HashSet<Guid>();
+                                    
domainsInProblems.addAll(domainsInPoolThatNonVisibleByVds);
+                                    
domainsInProblems.addAll(domainsSeenByVdsInProblem);
+
+                                } catch (RuntimeException ex) {
+                                    log.error("error in UpdateVdsDomainsData", 
ex);
                                 }
-                            });
+
+                            }
+                            return domainsInProblems;
+                        }
+                    });
             if (domainsInProblems != null) {
-                synchronized (_lockObject) {
+                lockIrsProxyLockObject();
+                try {
                     // during reconstruct master we do not want to update
                     // domains status
                     if (duringReconstructMaster.get()) {
@@ -1143,6 +1158,8 @@
                     }
 
                     updateProblematicVdsData(vdsId, vdsName, 
domainsInProblems);
+                } finally {
+                    unlockIrsProxyLockObject();
                 }
             }
         }
@@ -1209,12 +1226,15 @@
 
         @OnTimerMethodAnnotation("OnTimer")
         public void OnTimer(Guid domainId) {
-            synchronized (_lockObject) {
+            lockIrsProxyLockObject();
+            try {
                 if (_domainsInProblem.containsKey(domainId)) {
                     log.info("starting ProcessDomainRecovery for domain " + 
domainId);
                     ProcessDomainRecovery(domainId);
                 }
                 _timers.remove(domainId);
+            } finally {
+                unlockIrsProxyLockObject();
             }
         }
 
@@ -1377,18 +1397,22 @@
          * deletes all the jobs for the domains in the pool and clears the 
problematic entities caches.
          */
         public void clearCache() {
-            synchronized (_lockObject) {
+            lockIrsProxyLockObject();
+            try {
                 log.info("clearing cache for problematic entities in pool " + 
_storagePoolId);
                 // clear lists
                 _timers.clear();
                 _domainsInProblem.clear();
                 _vdssInProblem.clear();
                 duringReconstructMaster.set(false);
+            } finally {
+                unlockIrsProxyLockObject();
             }
         }
 
         public void clearPoolTimers() {
-            synchronized (_lockObject) {
+            lockIrsProxyLockObject();
+            try {
                 log.info("clear domain error-timers for pool " + 
_storagePoolId);
                 duringReconstructMaster.set(true);
                 for (String jobId : _timers.values()) {
@@ -1398,6 +1422,8 @@
                         log.warn("failed deleting job " + jobId);
                     }
                 }
+            } finally {
+                unlockIrsProxyLockObject();
             }
         }
 
@@ -1417,12 +1443,15 @@
             log.infoFormat("Clearing cache of pool: {0} for problematic 
entities of VDS: {1}.",
                     _storagePoolId, vdsName);
 
-            synchronized (_lockObject) {
+            lockIrsProxyLockObject();
+            try {
                 if (_vdssInProblem.containsKey(vdsId)) {
                     for (Guid domainId : new 
HashSet<Guid>(_vdssInProblem.get(vdsId))) {
                         DomainRecoveredFromProblem(domainId, vdsId, vdsName);
                     }
                 }
+            } finally {
+                unlockIrsProxyLockObject();
             }
         }
 
@@ -1469,6 +1498,24 @@
         }
     }
 
+    public static boolean tryLockIrsProxyData(Guid storagePoolId) {
+        // implemented this way to prevent NPE in case that the pool is 
removed from the map.
+        // TODO: prevent such a case when running DestoryStoragePool
+        try {
+            return 
_irsProxyData.get(storagePoolId).tryLockIrsProxyLockObject();
+        } catch (Exception e) {
+            return false;
+        }
+    }
+
+    public static void unLockIrsProxyData(Guid storagePoolId) {
+        // implemented this way to prevent NPE in case that the pool is 
removed from the map.
+        IrsProxyData proxyData = _irsProxyData.get(storagePoolId);
+        if (proxyData != null) {
+            proxyData.unlockIrsProxyLockObject();
+        }
+    }
+
     /**
      * Return the IRS Proxy object for the given pool id. If there's no proxy 
data available, since there's no SPM
      * for the pool, then returns <code>null</code>.


--
To view, visit http://gerrit.ovirt.org/7982
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia0dfb227b014af602e5bd1ab952d7c543992aa0f
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine
Gerrit-Branch: master
Gerrit-Owner: Liron Aravot <lara...@redhat.com>
_______________________________________________
Engine-patches mailing list
Engine-patches@ovirt.org
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to