Hello Kanagaraj M, I'd like you to do a code review. Please visit
http://gerrit.ovirt.org/27838 to review the following change. Change subject: gluster: check gluster daemon status in sync-job ...................................................................... gluster: check gluster daemon status in sync-job In sync-job, peer list will be retrieved always from the first UP server. There are chances that gluster daemon in other servers may be down. In this case, peer status for those servers will DISCONNECTED. When a host is found in UP state in Database and peer status is 'DISCONNECTED', that host will be moved to Non-Operational status. 'peer status' command will be executed on that server to make sure the gluster daemon is actually down before moving to Non-Operational. Change-Id: I3e0f661cb496e741f7c06df42ce3b55037a87e28 Bug-Url: https://bugzilla.redhat.com/1056997 Bug-Url: https://bugzilla.redhat.com/1097736 Signed-off-by: Kanagaraj M <kmayi...@redhat.com> Signed-off-by: Sahina Bose <sab...@redhat.com> --- M backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java 1 file changed, 34 insertions(+), 20 deletions(-) git pull ssh://gerrit.ovirt.org:29418/ovirt-engine refs/changes/38/27838/1 diff --git a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java index 6bcf4da..321a0d4 100644 --- a/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java +++ b/backend/manager/modules/bll/src/main/java/org/ovirt/engine/core/bll/gluster/GlusterSyncJob.java @@ -27,6 +27,7 @@ import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeAdvancedDetails; import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeEntity; import org.ovirt.engine.core.common.businessentities.gluster.GlusterVolumeOptionEntity; +import org.ovirt.engine.core.common.businessentities.gluster.PeerStatus; import org.ovirt.engine.core.common.businessentities.gluster.TransportType; import org.ovirt.engine.core.common.businessentities.network.VdsNetworkInterface; import org.ovirt.engine.core.common.constants.gluster.GlusterConstants; @@ -36,6 +37,7 @@ import org.ovirt.engine.core.common.vdscommands.RemoveVdsVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; import org.ovirt.engine.core.common.vdscommands.VDSReturnValue; +import org.ovirt.engine.core.common.vdscommands.VdsIdVDSCommandParametersBase; import org.ovirt.engine.core.common.vdscommands.gluster.GlusterVolumeAdvancedDetailsVDSParameters; import org.ovirt.engine.core.common.vdscommands.gluster.GlusterVolumesListVDSParameters; import org.ovirt.engine.core.compat.Guid; @@ -133,7 +135,7 @@ try { List<GlusterServerInfo> fetchedServers = fetchServers(cluster, upServer, existingServers); if (fetchedServers != null) { - removeDetachedServers(existingServers, fetchedServers); + syncServers(existingServers, fetchedServers); } } catch(Exception e) { log.errorFormat("Error while refreshing server data for cluster {0} from database!", cluster.getName(), e); @@ -142,22 +144,35 @@ } } - private void removeDetachedServers(List<VDS> existingServers, List<GlusterServerInfo> fetchedServers) { + private void syncServers(List<VDS> existingServers, List<GlusterServerInfo> fetchedServers) { log.debugFormat("Existing servers list returned {0} comparing with fetched servers {1)", existingServers, fetchedServers); boolean serverRemoved = false; for (VDS server : existingServers) { - if (isRemovableStatus(server.getStatus()) && serverDetached(server, fetchedServers)) { - log.infoFormat("Server {0} has been removed directly using the gluster CLI. Removing it from engine as well.", - server.getName()); - logUtil.logServerMessage(server, AuditLogType.GLUSTER_SERVER_REMOVED_FROM_CLI); - try (EngineLock lock = getGlusterUtil().acquireGlusterLockWait(server.getId())) { - removeServerFromDb(server); - // remove the server from resource manager - runVdsCommand(VDSCommandType.RemoveVds, new RemoveVdsVDSCommandParameters(server.getId())); - serverRemoved = true; - } catch (Exception e) { - log.errorFormat("Error while removing server {0} from database!", server.getName(), e); + + if (isRemovableStatus(server.getStatus())) { + GlusterServerInfo glusterServer = findGlusterServer(server, fetchedServers); + if (glusterServer == null) { + log.infoFormat("Server {0} has been removed directly using the gluster CLI. Removing it from engine as well.", + server.getName()); + logUtil.logServerMessage(server, AuditLogType.GLUSTER_SERVER_REMOVED_FROM_CLI); + try (EngineLock lock = getGlusterUtil().acquireGlusterLockWait(server.getId())) { + removeServerFromDb(server); + // remove the server from resource manager + runVdsCommand(VDSCommandType.RemoveVds, new RemoveVdsVDSCommandParameters(server.getId())); + serverRemoved = true; + } catch (Exception e) { + log.errorFormat("Error while removing server {0} from database!", server.getName(), e); + } + } + else if (server.getStatus() == VDSStatus.Up && glusterServer.getStatus() == PeerStatus.DISCONNECTED) { + // check gluster is running, if down then move the host to Non-Operational + VDSReturnValue returnValue = + runVdsCommand(VDSCommandType.GlusterServersList, + new VdsIdVDSCommandParametersBase(server.getId())); + if (!returnValue.getSucceeded()) { + setNonOperational(server); + } } } } @@ -201,32 +216,31 @@ } /** - * Returns true if the given server has been detached i.e. cannot be found in the list of fetched servers. + * Returns the equivalent GlusterServer from the list of fetched servers. * * @param server * @param fetchedServers - * @return + * @return GlusterServerInfo */ - private boolean serverDetached(VDS server, List<GlusterServerInfo> fetchedServers) { + private GlusterServerInfo findGlusterServer(VDS server, List<GlusterServerInfo> fetchedServers) { if (GlusterFeatureSupported.glusterHostUuidSupported(server.getVdsGroupCompatibilityVersion())) { // compare gluster host uuid stored in server with the ones fetched from list GlusterServer glusterServer = getGlusterServerDao().getByServerId(server.getId()); for (GlusterServerInfo fetchedServer : fetchedServers) { if (fetchedServer.getUuid().equals(glusterServer.getGlusterServerUuid())) { - return false; + return fetchedServer; } } - return true; } else { List<String> vdsIps = getVdsIps(server); for (GlusterServerInfo fetchedServer : fetchedServers) { if (fetchedServer.getHostnameOrIp().equals(server.getHostName()) || vdsIps.contains(fetchedServer.getHostnameOrIp())) { - return false; + return fetchedServer; } } - return true; } + return null; } private List<String> getVdsIps(VDS vds) { -- To view, visit http://gerrit.ovirt.org/27838 To unsubscribe, visit http://gerrit.ovirt.org/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I3e0f661cb496e741f7c06df42ce3b55037a87e28 Gerrit-PatchSet: 1 Gerrit-Project: ovirt-engine Gerrit-Branch: ovirt-engine-3.4 Gerrit-Owner: Sahina Bose <sab...@redhat.com> Gerrit-Reviewer: Kanagaraj M <kmayi...@redhat.com> _______________________________________________ Engine-patches mailing list Engine-patches@ovirt.org http://lists.ovirt.org/mailman/listinfo/engine-patches