Liron Aravot has uploaded a new change for review. Change subject: core: WIP :prevent simultaneous ReconstructsMasterDomain on the same pool(#845838) ......................................................................
core: WIP :prevent simultaneous ReconstructsMasterDomain on the same pool(#845838) https://bugzilla.redhat.com/845838 ReconsturctMasterDomain command can be triggered from the GUI (by executing Reinitialize DC) and by IrsProxyData timer which may cause to two different ReconstructMasterDomain commands run on the same pool simultanously, this patch prevents it by acquiring lock on the IrsProxyData object which represents the pool while executing the ReconstructMasterDomain command. Change-Id: Ia0dfb227b014af602e5bd1ab952d7c543992aa0f Signed-off-by: Liron Aravot <lara...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java M backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java 3 files changed, 145 insertions(+), 77 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/82/7982/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java index 7a7aac1..feef787 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/ReconstructMasterDomainCommand.java @@ -33,6 +33,7 @@ import org.ovirt.engine.core.dal.VdcBllMessages; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.ovirt.engine.core.utils.transaction.TransactionMethod; +import org.ovirt.engine.core.vdsbroker.irsbroker.IrsBrokerCommand; @SuppressWarnings("serial") @NonTransactiveCommandAttribute(forceCompensation = true) @@ -54,6 +55,23 @@ @Override protected boolean canDoAction() { + boolean toReturn = false; + if (IrsBrokerCommand.tryLockIrsProxyData(getStoragePool().getId())) { + try { + toReturn = performChecks(); + } catch (Exception e) { + IrsBrokerCommand.unLockIrsProxyData(getStoragePool().getId()); + toReturn = false; + } finally { + if (!toReturn) { + IrsBrokerCommand.unLockIrsProxyData(getStoragePool().getId()); + } + } + } + return toReturn; + } + + protected boolean performChecks(){ List<storage_pool_iso_map> poolDomains = DbFacade.getInstance() .getStoragePoolIsoMapDAO().getAllForStoragePool(getStoragePool().getId()); for (storage_pool_iso_map poolDomain : poolDomains) { @@ -173,6 +191,9 @@ new MarkPoolInReconstructModeVDSCommandParameters(getStoragePoolId() .getValue(), ReconstructMarkAction.ClearCache)); } + // if we got here - it mean that the IrsProxyData lock was performed by + // canDoAction() method and that it's still held by this thread. + IrsBrokerCommand.unLockIrsProxyData(getStoragePool().getId()); } protected boolean stopSpm() { diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java index 4f1a0be..a332249 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/storage/RecoveryStoragePoolCommand.java @@ -47,8 +47,8 @@ } @Override - protected boolean canDoAction() { - boolean returnValue = super.canDoAction() && checkStoragePool(); + protected boolean performChecks() { + boolean returnValue = super.performChecks() && checkStoragePool(); getReturnValue().getCanDoActionMessages().remove(VdcBllMessages.VAR__ACTION__RECONSTRUCT_MASTER.toString()); addCanDoActionMessage(VdcBllMessages.VAR__ACTION__RECOVER_POOL); diff --git a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java index 3659a3f..050ee7e 100644 --- a/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java +++ b/backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/irsbroker/IrsBrokerCommand.java @@ -1040,7 +1040,7 @@ private final Map<Guid, HashSet<Guid>> _domainsInProblem = new ConcurrentHashMap<Guid, HashSet<Guid>>(); private final Map<Guid, String> _timers = new HashMap<Guid, String>(); private AtomicBoolean duringReconstructMaster = new AtomicBoolean(false); - private final Object _lockObject = new Object(); + private final ReentrantLock _lockObject = new ReentrantLock(); private final ReentrantLock _lockObjForDbSave = new ReentrantLock(); public void lockDbSave() { @@ -1053,87 +1053,102 @@ } } + public void lockIrsProxyLockObject() { + _lockObject.lock(); + } + + public boolean tryLockIrsProxyLockObject() { + return _lockObject.tryLock(); + } + + public void unlockIrsProxyLockObject() { + if (_lockObject.isHeldByCurrentThread()) { + _lockObject.unlock(); + } + } + public void UpdateVdsDomainsData(final Guid vdsId, final String vdsName, final java.util.ArrayList<VDSDomainsData> data) { Set<Guid> domainsInProblems = - (Set<Guid>) TransactionSupport.executeInScope(TransactionScopeOption.Suppress, - new TransactionMethod<Set<Guid>>() { - @Override - public Set<Guid> runInTransaction() { + TransactionSupport.executeInScope(TransactionScopeOption.Suppress, + new TransactionMethod<Set<Guid>>() { + @Override + public Set<Guid> runInTransaction() { - Set<Guid> domainsInProblems = null; - storage_pool storagePool = - DbFacade.getInstance().getStoragePoolDAO().get(_storagePoolId); - if (storagePool != null - && (storagePool.getstatus() == StoragePoolStatus.Up || storagePool.getstatus() == StoragePoolStatus.Problematic)) { + Set<Guid> domainsInProblems = null; + storage_pool storagePool = + DbFacade.getInstance().getStoragePoolDAO().get(_storagePoolId); + if (storagePool != null + && (storagePool.getstatus() == StoragePoolStatus.Up || storagePool.getstatus() == StoragePoolStatus.Problematic)) { - try { + try { - // build a list of all domains in pool - // which are in status Active or Unknown - Set<Guid> domainsInPool = new HashSet<Guid>( - DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds( - _storagePoolId, StorageDomainStatus.Active)); - domainsInPool.addAll(DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds( - _storagePoolId, StorageDomainStatus.Unknown)); + // build a list of all domains in pool + // which are in status Active or Unknown + Set<Guid> domainsInPool = new HashSet<Guid>( + DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds( + _storagePoolId, StorageDomainStatus.Active)); + domainsInPool.addAll(DbFacade.getInstance().getStorageDomainStaticDAO().getAllIds( + _storagePoolId, StorageDomainStatus.Unknown)); - // build a list of all the domains in - // pool (domainsInPool) that are not - // visible by the host. - List<Guid> domainsInPoolThatNonVisibleByVds = new ArrayList<Guid>(); - Set<Guid> dataDomainIds = new HashSet<Guid>(); - for (VDSDomainsData tempData : data) { - dataDomainIds.add(tempData.getDomainId()); - } - for (Guid tempDomainId : domainsInPool) { - if (!dataDomainIds.contains(tempDomainId)) { - domainsInPoolThatNonVisibleByVds.add(tempDomainId); - } - } - - // build a list of domains that the host - // reports as in problem (code!=0) or (code==0 - // && lastChecl > - // ConfigValues.MaxStorageVdsTimeoutCheckSec) - // and are contained in the Active or - // Unknown domains in pool - List<Guid> domainsSeenByVdsInProblem = new ArrayList<Guid>(); - for (VDSDomainsData tempData : data) { - if (domainsInPool.contains(tempData.getDomainId())) { - if (tempData.getCode() != 0) { - domainsSeenByVdsInProblem.add(tempData.getDomainId()); - } else if (tempData.getLastCheck() > Config - .<Double> GetValue(ConfigValues.MaxStorageVdsTimeoutCheckSec)) { - domainsSeenByVdsInProblem.add(tempData.getDomainId()); - } else if (tempData.getDelay() > Config.<Double> GetValue(ConfigValues.MaxStorageVdsDelayCheckSec)) { - AuditLogableBase logable = new AuditLogableBase(); - logable.setVdsId(vdsId); - logable.setStorageDomainId(tempData.getDomainId()); - logable.AddCustomValue("Delay", - Double.toString(tempData.getDelay())); - AuditLogDirector.log(logable, - AuditLogType.VDS_DOMAIN_DELAY_INTERVAL); - } - } - } - - // build a list of all potential domains - // in problem - domainsInProblems = new HashSet<Guid>(); - domainsInProblems.addAll(domainsInPoolThatNonVisibleByVds); - domainsInProblems.addAll(domainsSeenByVdsInProblem); - - } catch (RuntimeException ex) { - log.error("error in UpdateVdsDomainsData", ex); - } - + // build a list of all the domains in + // pool (domainsInPool) that are not + // visible by the host. + List<Guid> domainsInPoolThatNonVisibleByVds = new ArrayList<Guid>(); + Set<Guid> dataDomainIds = new HashSet<Guid>(); + for (VDSDomainsData tempData : data) { + dataDomainIds.add(tempData.getDomainId()); } - return domainsInProblems; + for (Guid tempDomainId : domainsInPool) { + if (!dataDomainIds.contains(tempDomainId)) { + domainsInPoolThatNonVisibleByVds.add(tempDomainId); + } + } + + // build a list of domains that the host + // reports as in problem (code!=0) or (code==0 + // && lastChecl > + // ConfigValues.MaxStorageVdsTimeoutCheckSec) + // and are contained in the Active or + // Unknown domains in pool + List<Guid> domainsSeenByVdsInProblem = new ArrayList<Guid>(); + for (VDSDomainsData tempData : data) { + if (domainsInPool.contains(tempData.getDomainId())) { + if (tempData.getCode() != 0) { + domainsSeenByVdsInProblem.add(tempData.getDomainId()); + } else if (tempData.getLastCheck() > Config + .<Double> GetValue(ConfigValues.MaxStorageVdsTimeoutCheckSec)) { + domainsSeenByVdsInProblem.add(tempData.getDomainId()); + } else if (tempData.getDelay() > Config.<Double> GetValue(ConfigValues.MaxStorageVdsDelayCheckSec)) { + AuditLogableBase logable = new AuditLogableBase(); + logable.setVdsId(vdsId); + logable.setStorageDomainId(tempData.getDomainId()); + logable.AddCustomValue("Delay", + Double.toString(tempData.getDelay())); + AuditLogDirector.log(logable, + AuditLogType.VDS_DOMAIN_DELAY_INTERVAL); + } + } + } + + // build a list of all potential domains + // in problem + domainsInProblems = new HashSet<Guid>(); + domainsInProblems.addAll(domainsInPoolThatNonVisibleByVds); + domainsInProblems.addAll(domainsSeenByVdsInProblem); + + } catch (RuntimeException ex) { + log.error("error in UpdateVdsDomainsData", ex); } - }); + + } + return domainsInProblems; + } + }); if (domainsInProblems != null) { - synchronized (_lockObject) { + lockIrsProxyLockObject(); + try { // during reconstruct master we do not want to update // domains status if (duringReconstructMaster.get()) { @@ -1143,6 +1158,8 @@ } updateProblematicVdsData(vdsId, vdsName, domainsInProblems); + } finally { + unlockIrsProxyLockObject(); } } } @@ -1209,12 +1226,15 @@ @OnTimerMethodAnnotation("OnTimer") public void OnTimer(Guid domainId) { - synchronized (_lockObject) { + lockIrsProxyLockObject(); + try { if (_domainsInProblem.containsKey(domainId)) { log.info("starting ProcessDomainRecovery for domain " + domainId); ProcessDomainRecovery(domainId); } _timers.remove(domainId); + } finally { + unlockIrsProxyLockObject(); } } @@ -1377,18 +1397,22 @@ * deletes all the jobs for the domains in the pool and clears the problematic entities caches. */ public void clearCache() { - synchronized (_lockObject) { + lockIrsProxyLockObject(); + try { log.info("clearing cache for problematic entities in pool " + _storagePoolId); // clear lists _timers.clear(); _domainsInProblem.clear(); _vdssInProblem.clear(); duringReconstructMaster.set(false); + } finally { + unlockIrsProxyLockObject(); } } public void clearPoolTimers() { - synchronized (_lockObject) { + lockIrsProxyLockObject(); + try { log.info("clear domain error-timers for pool " + _storagePoolId); duringReconstructMaster.set(true); for (String jobId : _timers.values()) { @@ -1398,6 +1422,8 @@ log.warn("failed deleting job " + jobId); } } + } finally { + unlockIrsProxyLockObject(); } } @@ -1417,12 +1443,15 @@ log.infoFormat("Clearing cache of pool: {0} for problematic entities of VDS: {1}.", _storagePoolId, vdsName); - synchronized (_lockObject) { + lockIrsProxyLockObject(); + try { if (_vdssInProblem.containsKey(vdsId)) { for (Guid domainId : new HashSet<Guid>(_vdssInProblem.get(vdsId))) { DomainRecoveredFromProblem(domainId, vdsId, vdsName); } } + } finally { + unlockIrsProxyLockObject(); } } @@ -1469,6 +1498,24 @@ } } + public static boolean tryLockIrsProxyData(Guid storagePoolId) { + // implemented this way to prevent NPE in case that the pool is removed from the map. + // TODO: prevent such a case when running DestoryStoragePool + try { + return _irsProxyData.get(storagePoolId).tryLockIrsProxyLockObject(); + } catch (Exception e) { + return false; + } + } + + public static void unLockIrsProxyData(Guid storagePoolId) { + // implemented this way to prevent NPE in case that the pool is removed from the map. + IrsProxyData proxyData = _irsProxyData.get(storagePoolId); + if (proxyData != null) { + proxyData.unlockIrsProxyLockObject(); + } + } + /** * Return the IRS Proxy object for the given pool id. If there's no proxy data available, since there's no SPM * for the pool, then returns <code>null</code>. -- To view, visit http://gerrit.ovirt.org/7982 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ia0dfb227b014af602e5bd1ab952d7c543992aa0f Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: master Gerrit-Owner: Liron Aravot <lara...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches