Yes. The current fix is to let each gateway receiver (in hydra tests, there're a lot) to compete port 5500. Only one member will win, all other members will timeout after 2 minutes. Then they keep compete for port 5501. Again, only one member will win.
In that case, if there are 5 receivers, it will take 10 minutes to start all the receivers. So I enhanced the current fix (see the diff attached) to let each receiver to pick a random port to start, if any one failed, only this guy will try currPort++. If reached endPort, continue on startPort, until reached his random port again. To enhance the 2-minute-timeout is definitely another issue. Regards Gester On Tue, Sep 4, 2018 at 4:38 PM, Dan Smith <dsm...@pivotal.io> wrote: > Spitting this into a separate thread. > > I see the issue. The two minute timeout is the constructor for > AcceptorImpl, where it retries to bind for 2 minutes. > > That behavior makes sense for CacheServer.start. > > But it doesn't make sense for the new logic in GatewayReceiver.start() from > GEODE-5591. That code is trying to use CacheServer.start to scan for an > available port, trying each port in a range. That free port finding logic > really doesn't want to have two minutes of retries for each port. It seems > like we need to rework the fix for GEODE-5591. > > Does it make sense to hold up the release to rework this fix, or should we > just revert it? Have we switched concourse over to using alpine linux, > which I think was the original motivation for this fix? > > -Dan > > On Tue, Sep 4, 2018 at 4:25 PM, Dan Smith <dsm...@pivotal.io> wrote: > > > Why is it waiting at all in this case? Where is this 2 minute timeout > > coming from? > > > > -Dan > > > > On Tue, Sep 4, 2018 at 4:12 PM, Sai Boorlagadda < > sai.boorlaga...@gmail.com > > > wrote: > > > >> So the issue is that it takes longer to start than previous releases? > >> Also, is this wait time only when using Gfsh to create gateway-receiver? > >> > >> On Tue, Sep 4, 2018 at 4:03 PM Nabarun Nag <n...@apache.org> wrote: > >> > >> > Currently we have a minor issue in the release branch as pointed out > by > >> > Barry O. > >> > We will wait till a resolution is figured out for this issue. > >> > > >> > Steps: > >> > 1. create locator > >> > 2. start server --name=server1 --server-port=40404 > >> > 3. start server --name=server2 --server-port=40405 > >> > 4. create gateway-receiver --member=server1 > >> > 5. create gateway-receiver --member=server2 `This gets stuck for 2 > >> minutes` > >> > > >> > Is the 2 minute wait time acceptable? Should we document it? When we > >> revert > >> > GEODE-5591, this issue does not happen. > >> > > >> > Regards > >> > Nabarun Nag > >> > > >> > > >
diff --git a/geode-wan/src/distributedTest/java/org/apache/geode/internal/cache/wan/WANTestBase.java b/geode-wan/src/distributedTest/java/org/apache/geode/internal/cache/wan/WANTestBase.java index a09194209..e13e7ec78 100644 --- a/geode-wan/src/distributedTest/java/org/apache/geode/internal/cache/wan/WANTestBase.java +++ b/geode-wan/src/distributedTest/java/org/apache/geode/internal/cache/wan/WANTestBase.java @@ -2020,7 +2020,7 @@ public class WANTestBase extends DistributedTestCase { GatewayReceiver receiver = fact.create(); assertThatThrownBy(receiver::start) .isInstanceOf(GatewayReceiverException.class) - .hasMessageContaining("No available free port found in the given range"); + .hasMessageContaining("Failed to create server socket on"); } public static int createReceiverWithSSL(int locPort) { diff --git a/geode-wan/src/integrationTest/java/org/apache/geode/internal/cache/wan/misc/WANConfigurationJUnitTest.java b/geode-wan/src/integrationTest/java/org/apache/geode/internal/cache/wan/misc/WANConfigurationJUnitTest.java index 038b759ae..ccd9503e6 100644 --- a/geode-wan/src/integrationTest/java/org/apache/geode/internal/cache/wan/misc/WANConfigurationJUnitTest.java +++ b/geode-wan/src/integrationTest/java/org/apache/geode/internal/cache/wan/misc/WANConfigurationJUnitTest.java @@ -448,7 +448,8 @@ public class WANConfigurationJUnitTest { GatewayReceiver receiver = fact.create(); - assertThatThrownBy(() -> receiver.start()).isInstanceOf(GatewayReceiverException.class); + assertThatThrownBy(() -> receiver.start()).isInstanceOf(GatewayReceiverException.class) + .hasMessageContaining("Failed to create server socket on"); } @Test diff --git a/geode-wan/src/main/java/org/apache/geode/internal/cache/wan/GatewayReceiverImpl.java b/geode-wan/src/main/java/org/apache/geode/internal/cache/wan/GatewayReceiverImpl.java index cd2702991..786b354a4 100644 --- a/geode-wan/src/main/java/org/apache/geode/internal/cache/wan/GatewayReceiverImpl.java +++ b/geode-wan/src/main/java/org/apache/geode/internal/cache/wan/GatewayReceiverImpl.java @@ -26,6 +26,7 @@ import org.apache.geode.cache.wan.GatewayReceiver; import org.apache.geode.cache.wan.GatewayTransportFilter; import org.apache.geode.distributed.internal.InternalDistributedSystem; import org.apache.geode.distributed.internal.ResourceEvent; +import org.apache.geode.internal.AvailablePort; import org.apache.geode.internal.cache.CacheServerImpl; import org.apache.geode.internal.cache.InternalCache; import org.apache.geode.internal.i18n.LocalizedStrings; @@ -144,8 +145,9 @@ public class GatewayReceiverImpl implements GatewayReceiver { return; } - for (int port = this.startPort; port <= this.endPort; port++) { - receiver.setPort(port); + int loopStartPort = getPortToStart(); + for (int currPort = loopStartPort; currPort <= endPort; currPort++) { + receiver.setPort(currPort); receiver.setSocketBufferSize(socketBufferSize); receiver.setMaximumTimeBetweenPings(timeBetPings); if (hostnameForSenders != null && !hostnameForSenders.isEmpty()) { @@ -155,19 +157,35 @@ public class GatewayReceiverImpl implements GatewayReceiver { receiver.setGroups(new String[] {GatewayReceiver.RECEIVER_GROUP}); ((CacheServerImpl) receiver).setGatewayTransportFilter(this.filters); try { + this.port = currPort; receiver.start(); - this.port = port; break; } catch (IOException e) { - if (port == this.endPort) { - throw new GatewayReceiverException("No available free port found in the given range (" + + if (currPort == this.endPort && startPort != endPort) { + currPort = this.startPort; + logger.info("loopback to " + this.startPort); + } else if (currPort == loopStartPort - 1 || startPort == endPort) { + logger.warn("No available free port found in the given range (" + this.startPort + "-" + this.endPort + ")", e); + throw new GatewayReceiverException( + LocalizedStrings.SocketCreator_FAILED_TO_CREATE_SERVER_SOCKET_ON_0_1 + .toLocalizedString(new Object[] {bindAdd, this.port})); + } else { + if (e.getCause() != null && e.getCause().getMessage() + .contains("assign requested address")) { + throw new GatewayReceiverException( + LocalizedStrings.SocketCreator_FAILED_TO_CREATE_SERVER_SOCKET_ON_0_1 + .toLocalizedString(new Object[] {bindAdd, this.port})); + } else { + logger.warn(LocalizedMessage + .create(LocalizedStrings.GatewayReceiver_Address_Already_In_Use, this.port), + new Exception()); + } } } - } logger .info(LocalizedMessage.create(LocalizedStrings.GatewayReceiver_STARTED_ON_PORT, this.port)); @@ -177,6 +195,18 @@ public class GatewayReceiverImpl implements GatewayReceiver { } + private int getPortToStart() { + // choose a random port from the given port range + int rPort; + if (this.startPort == this.endPort) { + rPort = this.startPort; + } else { + rPort = AvailablePort.getRandomAvailablePortInRange(this.startPort, this.endPort, + AvailablePort.SOCKET); + } + return rPort; + } + public void stop() { if (!isRunning()) { throw new GatewayReceiverException(