Repository: incubator-ignite Updated Branches: refs/heads/ignite-752 10a3ef34a -> f714fd0a0
ignite-752: improved failure detection, fixed comments Project: http://git-wip-us.apache.org/repos/asf/incubator-ignite/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-ignite/commit/f714fd0a Tree: http://git-wip-us.apache.org/repos/asf/incubator-ignite/tree/f714fd0a Diff: http://git-wip-us.apache.org/repos/asf/incubator-ignite/diff/f714fd0a Branch: refs/heads/ignite-752 Commit: f714fd0a0320fcb4cb51c5baba9344f7f86cf75a Parents: 10a3ef3 Author: Denis Magda <dma...@gridgain.com> Authored: Thu Jul 23 13:14:24 2015 +0300 Committer: Denis Magda <dma...@gridgain.com> Committed: Thu Jul 23 13:14:24 2015 +0300 ---------------------------------------------------------------------- .../configuration/IgniteConfiguration.java | 7 ++- .../ignite/spi/discovery/tcp/ServerImpl.java | 49 ++++++-------------- 2 files changed, 18 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/f714fd0a/modules/core/src/main/java/org/apache/ignite/configuration/IgniteConfiguration.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/configuration/IgniteConfiguration.java b/modules/core/src/main/java/org/apache/ignite/configuration/IgniteConfiguration.java index 084d22c..f4fc4ec 100644 --- a/modules/core/src/main/java/org/apache/ignite/configuration/IgniteConfiguration.java +++ b/modules/core/src/main/java/org/apache/ignite/configuration/IgniteConfiguration.java @@ -1676,8 +1676,11 @@ public class IgniteConfiguration { /** * Sets failure detection threshold to use in {@link TcpDiscoverySpi} and {@link TcpCommunicationSpi}. * <p> - * The threshold helps to detect failed nodes in a cluster topology quickly and keep working only with the nodes - * that are alive. + * If the threshold is set for a server node then it helps to detect failed nodes in a cluster topology during the + * time that is equal to threshold's value and keep working only with the nodes that are alive. + * <p> + * If it's set for a client node then the client node will be able to detect a disconnection from it's router node + * during the time equal to threshold's value. * <p> * The failure detection threshold is an easy and straightforward way to setup {@link TcpDiscoverySpi} and * {@link TcpCommunicationSpi} depending on network conditions of a cluster. On the other hand if advanced setting http://git-wip-us.apache.org/repos/asf/incubator-ignite/blob/f714fd0a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java ---------------------------------------------------------------------- diff --git a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java index b085b3d..99a3ee2 100644 --- a/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java +++ b/modules/core/src/main/java/org/apache/ignite/spi/discovery/tcp/ServerImpl.java @@ -1828,9 +1828,6 @@ class ServerImpl extends TcpDiscoveryImpl { if (msg instanceof TcpDiscoveryJoinRequestMessage) processJoinRequestMessage((TcpDiscoveryJoinRequestMessage)msg); - else if (msg instanceof TcpDiscoveryConnectionCheckMessage) - processConnectionCheckMessage((TcpDiscoveryConnectionCheckMessage)msg); - else if (msg instanceof TcpDiscoveryClientReconnectMessage) processClientReconnectMessage((TcpDiscoveryClientReconnectMessage)msg); @@ -2210,7 +2207,15 @@ class ServerImpl extends TcpDiscoveryImpl { } } - prepareNodeAddedMessage(msg, next.id(), pendingMsgs.msgs, pendingMsgs.discardId); + if (msg instanceof TcpDiscoveryConnectionCheckMessage) { + if (!next.version().greaterThanEqual(TcpDiscoverySpi.FAILURE_DETECTION_MAJOR_VER, + TcpDiscoverySpi.FAILURE_DETECTION_MINOR_VER, + TcpDiscoverySpi.FAILURE_DETECTION_MAINT_VER)) + // Preserve backward compatibility with nodes of older versions. + msg = new TcpDiscoveryStatusCheckMessage(locNode, null); + } + else + prepareNodeAddedMessage(msg, next.id(), pendingMsgs.msgs, pendingMsgs.discardId); try { long tstamp = U.currentTimeMillis(); @@ -3794,37 +3799,6 @@ class ServerImpl extends TcpDiscoveryImpl { } /** - * Processes connection check message. - * - * @param msg Connection check message. - */ - private void processConnectionCheckMessage(TcpDiscoveryConnectionCheckMessage msg) { - assert msg.creatorNodeId().equals(getLocalNodeId()) && msg.senderNodeId() == null; - - if (spiStateCopy() != CONNECTED) { - if (log.isDebugEnabled()) - log.debug("Connection check message discarded (local node is leaving topology)."); - - return; - } - - if (next == null) { - if (log.isDebugEnabled()) - log.debug("Connection check message discarded (no next node in topology)."); - - return; - } - - // Link to the 'next' node is updated only inside RingMessageWorker thread, no need to check on 'null'. - if (!next.version().greaterThanEqual(TcpDiscoverySpi.FAILURE_DETECTION_MAJOR_VER, - TcpDiscoverySpi.FAILURE_DETECTION_MINOR_VER, TcpDiscoverySpi.FAILURE_DETECTION_MAINT_VER)) { - // Preserve backward compatibility with nodes of older versions. - processStatusCheckMessage(new TcpDiscoveryStatusCheckMessage(locNode, null)); - } else if (ring.hasRemoteNodes()) - sendMessageAcrossRing(msg); - } - - /** * @param nodeId Node ID. * @param metrics Metrics. * @param cacheMetrics Cache metrics. @@ -4081,6 +4055,9 @@ class ServerImpl extends TcpDiscoveryImpl { ", connCheckFreq=" + connCheckFreq + ']'); failureThresholdReached = true; + + // Reset sent time deliberately to force sending connection check message. + lastTimeConnCheckMsgSent = 0; } long elapsed = (lastTimeConnCheckMsgSent + connCheckFreq) - U.currentTimeMillis(); @@ -4089,7 +4066,7 @@ class ServerImpl extends TcpDiscoveryImpl { return; if (ring.hasRemoteNodes()) { - processConnectionCheckMessage(new TcpDiscoveryConnectionCheckMessage(locNode)); + sendMessageAcrossRing(new TcpDiscoveryConnectionCheckMessage(locNode)); lastTimeConnCheckMsgSent = U.currentTimeMillis(); }