[ https://issues.apache.org/jira/browse/GEODE-8656?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17234920#comment-17234920 ]
ASF GitHub Bot commented on GEODE-8656: --------------------------------------- bschuchardt commented on a change in pull request #5670: URL: https://github.com/apache/geode/pull/5670#discussion_r525594024 ########## File path: geode-assembly/src/acceptanceTest/java/org/apache/geode/cache/wan/SeveralGatewayReceiversWithSamePortAndHostnameForSenders.java ########## @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.geode.cache.wan; + +import static com.palantir.docker.compose.execution.DockerComposeExecArgument.arguments; +import static com.palantir.docker.compose.execution.DockerComposeExecOption.options; +import static org.apache.geode.cache.Region.SEPARATOR; +import static org.apache.geode.distributed.ConfigurationProperties.DISTRIBUTED_SYSTEM_ID; +import static org.apache.geode.distributed.ConfigurationProperties.LOCATORS; +import static org.apache.geode.distributed.ConfigurationProperties.REMOTE_LOCATORS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.StringTokenizer; + +import com.palantir.docker.compose.DockerComposeRule; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.geode.cache.Cache; +import org.apache.geode.cache.CacheFactory; +import org.apache.geode.cache.PartitionAttributesFactory; +import org.apache.geode.cache.Region; +import org.apache.geode.cache.RegionFactory; +import org.apache.geode.cache.RegionShortcut; +import org.apache.geode.cache.persistence.PartitionOfflineException; +import org.apache.geode.client.sni.NotOnWindowsDockerRule; +import org.apache.geode.distributed.Locator; +import org.apache.geode.internal.cache.ForceReattemptException; +import org.apache.geode.internal.cache.PoolStats; +import org.apache.geode.internal.cache.wan.AbstractGatewaySender; +import org.apache.geode.internal.cache.wan.InternalGatewaySenderFactory; +import org.apache.geode.test.dunit.IgnoredException; +import org.apache.geode.test.dunit.VM; +import org.apache.geode.test.dunit.rules.DistributedRule; +import org.apache.geode.test.junit.categories.WanTest; + + +/** + * These tests use two Geode sites: + * + * - One site (the remote one) consisting of a 2-server, 1-locator Geode cluster. + * The servers host a partition region (region-wan) and have gateway senders to receive events + * from the other site with the same value for hostname-for-senders and listening on the + * same port (2324). + * The servers and locator run each inside a Docker container and are not route-able + * from the host (where this JUnit test is running). + * Another Docker container is running the HAProxy image and it's set up as a TCP load balancer. + * The other site connects to the locator and to the gateway receivers via the + * TCP load balancer that forwards traffic directed to the 20334 port to the locator and + * traffic directed to the 2324 port to the receivers in a round robin fashion. + * + * - Another site consisting of a 1-server, 1-locator Geode cluster. + * The server hosts a partition region (region-wan) and has a parallel gateway receiver + * to send events to the remote site. + * + * The aim of the tests is verify that when several gateway receivers in a remote site + * share the same port and hostname-for-senders, the pings sent from the gateway senders + * reach the right gateway receiver and not just any of the receivers. Failure to do this + * may result in the closing of connections by a gateway receiver for not having + * received the ping in time. + */ +@Category({WanTest.class}) +public class SeveralGatewayReceiversWithSamePortAndHostnameForSenders { + + private static final int NUM_SERVERS = 2; + + private static Cache cache; + + private static final URL DOCKER_COMPOSE_PATH = + SeveralGatewayReceiversWithSamePortAndHostnameForSenders.class + .getResource("docker-compose.yml"); + + // Docker compose does not work on windows in CI. Ignore this test on windows + // Using a RuleChain to make sure we ignore the test before the rule comes into play + @ClassRule + public static NotOnWindowsDockerRule docker = + new NotOnWindowsDockerRule(() -> DockerComposeRule.builder() + .file(DOCKER_COMPOSE_PATH.getPath()).build()); + + @Rule + public DistributedRule distributedRule = + DistributedRule.builder().withVMCount(NUM_SERVERS + 1).build(); + + @BeforeClass + public static void beforeClass() throws Exception { + // Start locator + docker.get().exec(options("-T"), "locator", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-locator.gfsh")); + // Start server1 + docker.get().exec(options("-T"), "server1", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-server1.gfsh")); + // Start server2 + docker.get().exec(options("-T"), "server2", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-server2.gfsh")); + // Create partition region and gateway receiver + docker.get().exec(options("-T"), "locator", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-create.gfsh")); + } + + public SeveralGatewayReceiversWithSamePortAndHostnameForSenders() { + super(); + } + + @Test + public void testPingsToReceiversWithSamePortAndHostnameForSendersReachTheRightReceivers() { + String senderId = "ln"; + String regionName = "region-wan"; + final int remoteLocPort = 20334; + + int locPort = createLocator(VM.getVM(0), 1, remoteLocPort); + + VM vm1 = VM.getVM(1); + createCache(vm1, locPort); + + // We must use more than one dispatcher thread. With just one dispatcher thread, only one + // connection will be created by the sender towards one of the receivers and it will be + // monitored by the one ping thread for that remote receiver. + // With more than one thread, several connections will be opened and there should be one ping + // thread per remote receiver. + createGatewaySender(vm1, senderId, 2, true, 5, + 5, GatewaySender.DEFAULT_ORDER_POLICY); + + createPartitionedRegion(vm1, regionName, senderId, 0, 10); + + int NUM_PUTS = 10; + + putKeyValues(vm1, NUM_PUTS, regionName); + + // Wait longer than the value set in the receivers for + // maximum-time-between-pings (60000) to verify that connections are not closed + // by the receivers because each has received the pings timely. + int maxTimeBetweenPingsInReceiver = 65000; + try { + Thread.sleep(maxTimeBetweenPingsInReceiver); + } catch (InterruptedException e) { + e.printStackTrace(); Review comment: While Intellij Idea likes to generate this kind of code it isn't useful when there's a failure. Get rid of the try/catch and add a "throws" to the test method instead. ########## File path: geode-assembly/src/acceptanceTest/java/org/apache/geode/cache/wan/SeveralGatewayReceiversWithSamePortAndHostnameForSenders.java ########## @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.geode.cache.wan; + +import static com.palantir.docker.compose.execution.DockerComposeExecArgument.arguments; +import static com.palantir.docker.compose.execution.DockerComposeExecOption.options; +import static org.apache.geode.cache.Region.SEPARATOR; +import static org.apache.geode.distributed.ConfigurationProperties.DISTRIBUTED_SYSTEM_ID; +import static org.apache.geode.distributed.ConfigurationProperties.LOCATORS; +import static org.apache.geode.distributed.ConfigurationProperties.REMOTE_LOCATORS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.StringTokenizer; + +import com.palantir.docker.compose.DockerComposeRule; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.geode.cache.Cache; +import org.apache.geode.cache.CacheFactory; +import org.apache.geode.cache.PartitionAttributesFactory; +import org.apache.geode.cache.Region; +import org.apache.geode.cache.RegionFactory; +import org.apache.geode.cache.RegionShortcut; +import org.apache.geode.cache.persistence.PartitionOfflineException; +import org.apache.geode.client.sni.NotOnWindowsDockerRule; +import org.apache.geode.distributed.Locator; +import org.apache.geode.internal.cache.ForceReattemptException; +import org.apache.geode.internal.cache.PoolStats; +import org.apache.geode.internal.cache.wan.AbstractGatewaySender; +import org.apache.geode.internal.cache.wan.InternalGatewaySenderFactory; +import org.apache.geode.test.dunit.IgnoredException; +import org.apache.geode.test.dunit.VM; +import org.apache.geode.test.dunit.rules.DistributedRule; +import org.apache.geode.test.junit.categories.WanTest; + + +/** + * These tests use two Geode sites: + * + * - One site (the remote one) consisting of a 2-server, 1-locator Geode cluster. + * The servers host a partition region (region-wan) and have gateway senders to receive events + * from the other site with the same value for hostname-for-senders and listening on the + * same port (2324). + * The servers and locator run each inside a Docker container and are not route-able + * from the host (where this JUnit test is running). + * Another Docker container is running the HAProxy image and it's set up as a TCP load balancer. + * The other site connects to the locator and to the gateway receivers via the + * TCP load balancer that forwards traffic directed to the 20334 port to the locator and + * traffic directed to the 2324 port to the receivers in a round robin fashion. + * + * - Another site consisting of a 1-server, 1-locator Geode cluster. + * The server hosts a partition region (region-wan) and has a parallel gateway receiver + * to send events to the remote site. + * + * The aim of the tests is verify that when several gateway receivers in a remote site + * share the same port and hostname-for-senders, the pings sent from the gateway senders + * reach the right gateway receiver and not just any of the receivers. Failure to do this + * may result in the closing of connections by a gateway receiver for not having + * received the ping in time. + */ +@Category({WanTest.class}) +public class SeveralGatewayReceiversWithSamePortAndHostnameForSenders { Review comment: I don't know if it's a rule or not but the names of literally all of the other unit test classes in the Geode repository end with "Test", so maybe this could be SeveralGatewayReceiversWithSamePortAndHostnameForSenders**Test**. ########## File path: geode-core/src/main/java/org/apache/geode/cache/client/internal/LiveServerPinger.java ########## @@ -87,6 +87,7 @@ private void cancelFuture(Endpoint endpoint) { public void run2() { if (endpoint.timeToPing(pingIntervalNanos)) { try { + endpoint.updateLastExecute(); Review comment: (just a comment) I see that you've moved the updateLastExecute() for PingOp to LiveServerPinger. ########## File path: geode-assembly/src/acceptanceTest/java/org/apache/geode/cache/wan/SeveralGatewayReceiversWithSamePortAndHostnameForSenders.java ########## @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more contributor license + * agreements. See the NOTICE file distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.geode.cache.wan; + +import static com.palantir.docker.compose.execution.DockerComposeExecArgument.arguments; +import static com.palantir.docker.compose.execution.DockerComposeExecOption.options; +import static org.apache.geode.cache.Region.SEPARATOR; +import static org.apache.geode.distributed.ConfigurationProperties.DISTRIBUTED_SYSTEM_ID; +import static org.apache.geode.distributed.ConfigurationProperties.LOCATORS; +import static org.apache.geode.distributed.ConfigurationProperties.REMOTE_LOCATORS; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.net.URL; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.StringTokenizer; + +import com.palantir.docker.compose.DockerComposeRule; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import org.apache.geode.cache.Cache; +import org.apache.geode.cache.CacheFactory; +import org.apache.geode.cache.PartitionAttributesFactory; +import org.apache.geode.cache.Region; +import org.apache.geode.cache.RegionFactory; +import org.apache.geode.cache.RegionShortcut; +import org.apache.geode.cache.persistence.PartitionOfflineException; +import org.apache.geode.client.sni.NotOnWindowsDockerRule; +import org.apache.geode.distributed.Locator; +import org.apache.geode.internal.cache.ForceReattemptException; +import org.apache.geode.internal.cache.PoolStats; +import org.apache.geode.internal.cache.wan.AbstractGatewaySender; +import org.apache.geode.internal.cache.wan.InternalGatewaySenderFactory; +import org.apache.geode.test.dunit.IgnoredException; +import org.apache.geode.test.dunit.VM; +import org.apache.geode.test.dunit.rules.DistributedRule; +import org.apache.geode.test.junit.categories.WanTest; + + +/** + * These tests use two Geode sites: + * + * - One site (the remote one) consisting of a 2-server, 1-locator Geode cluster. + * The servers host a partition region (region-wan) and have gateway senders to receive events + * from the other site with the same value for hostname-for-senders and listening on the + * same port (2324). + * The servers and locator run each inside a Docker container and are not route-able + * from the host (where this JUnit test is running). + * Another Docker container is running the HAProxy image and it's set up as a TCP load balancer. + * The other site connects to the locator and to the gateway receivers via the + * TCP load balancer that forwards traffic directed to the 20334 port to the locator and + * traffic directed to the 2324 port to the receivers in a round robin fashion. + * + * - Another site consisting of a 1-server, 1-locator Geode cluster. + * The server hosts a partition region (region-wan) and has a parallel gateway receiver + * to send events to the remote site. + * + * The aim of the tests is verify that when several gateway receivers in a remote site + * share the same port and hostname-for-senders, the pings sent from the gateway senders + * reach the right gateway receiver and not just any of the receivers. Failure to do this + * may result in the closing of connections by a gateway receiver for not having + * received the ping in time. + */ +@Category({WanTest.class}) +public class SeveralGatewayReceiversWithSamePortAndHostnameForSenders { + + private static final int NUM_SERVERS = 2; + + private static Cache cache; + + private static final URL DOCKER_COMPOSE_PATH = + SeveralGatewayReceiversWithSamePortAndHostnameForSenders.class + .getResource("docker-compose.yml"); + + // Docker compose does not work on windows in CI. Ignore this test on windows + // Using a RuleChain to make sure we ignore the test before the rule comes into play + @ClassRule + public static NotOnWindowsDockerRule docker = + new NotOnWindowsDockerRule(() -> DockerComposeRule.builder() + .file(DOCKER_COMPOSE_PATH.getPath()).build()); + + @Rule + public DistributedRule distributedRule = + DistributedRule.builder().withVMCount(NUM_SERVERS + 1).build(); + + @BeforeClass + public static void beforeClass() throws Exception { + // Start locator + docker.get().exec(options("-T"), "locator", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-locator.gfsh")); + // Start server1 + docker.get().exec(options("-T"), "server1", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-server1.gfsh")); + // Start server2 + docker.get().exec(options("-T"), "server2", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-server2.gfsh")); + // Create partition region and gateway receiver + docker.get().exec(options("-T"), "locator", + arguments("gfsh", "run", "--file=/geode/scripts/geode-starter-create.gfsh")); + } + + public SeveralGatewayReceiversWithSamePortAndHostnameForSenders() { + super(); + } + + @Test + public void testPingsToReceiversWithSamePortAndHostnameForSendersReachTheRightReceivers() { + String senderId = "ln"; + String regionName = "region-wan"; + final int remoteLocPort = 20334; + + int locPort = createLocator(VM.getVM(0), 1, remoteLocPort); + + VM vm1 = VM.getVM(1); + createCache(vm1, locPort); + + // We must use more than one dispatcher thread. With just one dispatcher thread, only one + // connection will be created by the sender towards one of the receivers and it will be + // monitored by the one ping thread for that remote receiver. + // With more than one thread, several connections will be opened and there should be one ping + // thread per remote receiver. + createGatewaySender(vm1, senderId, 2, true, 5, + 5, GatewaySender.DEFAULT_ORDER_POLICY); + + createPartitionedRegion(vm1, regionName, senderId, 0, 10); + + int NUM_PUTS = 10; + + putKeyValues(vm1, NUM_PUTS, regionName); + + // Wait longer than the value set in the receivers for + // maximum-time-between-pings (60000) to verify that connections are not closed + // by the receivers because each has received the pings timely. + int maxTimeBetweenPingsInReceiver = 65000; Review comment: can the max be shortened so that this test doesn't take so long to run? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Ping not sent to the right gateway receiver endpoint when several receivers > have the same hostname-for-senders value > -------------------------------------------------------------------------------------------------------------------- > > Key: GEODE-8656 > URL: https://issues.apache.org/jira/browse/GEODE-8656 > Project: Geode > Issue Type: Bug > Components: wan > Reporter: Alberto Gomez > Assignee: Alberto Gomez > Priority: Major > Labels: pull-request-available > > When several gateway receivers have the same value for hostname-for-senders > (for example when running Geode under kubernetes and a load balancer balances > the load among the remote servers), the ping messages sent from the gateway > sender client are only sent to the receiver (endpoint) to which the sender > connected first. > The other receivers will not get the ping message which will imply that the > connections towards them will be closed after the configured timeout expires. > This ticket is a continuation of the work done ticket GEODE-7565. -- This message was sent by Atlassian Jira (v8.3.4#803005)