This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch 3.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/3.1 by this push: new 3e85f58ab9 Do not fall back to tablet servers when no scan servers by default (#4814) 3e85f58ab9 is described below commit 3e85f58ab923727ecfc21db1c696de2352da54e2 Author: Keith Turner <ktur...@apache.org> AuthorDate: Tue Aug 27 14:24:36 2024 -0700 Do not fall back to tablet servers when no scan servers by default (#4814) By default when using eventual scans and there are no scan servers present then the scan will fall back to tablet servers. This could be very surprising and disruptive when tablet servers do not have much resources allocated for scanning and scan servers do. In the case were scan servers are temporarily down it could cause a thundering herd that overwhelms tablet servers. This commit changes the default behavior to not fall back to tablet servers. This change is targeting 3.1 in order to avoid changing default behavior in 2.1.X. The capability to not fall back to tablet servers was introduced in 2.1.3 via #4671 with a default behavior to fallback. --- .../spi/scan/ConfigurableScanServerSelector.java | 13 +++--- .../scan/ConfigurableScanServerSelectorTest.java | 34 +++++++++++----- .../test/ScanServerGroupConfigurationIT.java | 2 + .../accumulo/test/ScanServerIT_NoServers.java | 46 +++++++++++----------- 4 files changed, 54 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java b/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java index a7900645ed..129209d9da 100644 --- a/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java +++ b/core/src/main/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelector.java @@ -97,12 +97,11 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; * <li>"s" for seconds</li> * <li>"ms" for milliseconds</li> * </ul> - * If duration is not specified this setting defaults to 0s, and will disable the wait for scan - * servers and will fall back to tablet servers immediately. When set to a large value, the selector - * will effectively wait for scan servers to become available before falling back to tablet servers. - * To ensure the selector never falls back scanning tablet servers an unrealistic wait time can be - * set. For instanced should be sufficient. Setting Waiting for scan servers is done via - * {@link org.apache.accumulo.core.spi.scan.ScanServerSelector.SelectorParameters#waitUntil(Supplier, Duration, String)}</li> + * If duration is not specified this setting defaults to 100 years, which for all practical purposes + * will never fall back to tablet servers. Waiting for scan servers is done via + * {@link org.apache.accumulo.core.spi.scan.ScanServerSelector.SelectorParameters#waitUntil(Supplier, Duration, String)}. + * To immediately fall back to tablet servers when no scan servers are present set this to + * zero.</li> * <li><b>attemptPlans : </b> A list of configuration to use for each scan attempt. Each list object * has the following fields: * <ul> @@ -249,7 +248,7 @@ public class ConfigurableScanServerSelector implements ScanServerSelector { int busyTimeoutMultiplier; String maxBusyTimeout; String group = ScanServerSelector.DEFAULT_SCAN_SERVER_GROUP_NAME; - String timeToWaitForScanServers = "0s"; + String timeToWaitForScanServers = 100 * 365 + "d"; transient boolean parsed = false; transient long parsedMaxBusyTimeout; diff --git a/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java b/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java index 54de138520..bb396889c1 100644 --- a/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java +++ b/core/src/test/java/org/apache/accumulo/core/spi/scan/ConfigurableScanServerSelectorTest.java @@ -37,6 +37,7 @@ import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.accumulo.core.client.TimedOutException; import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.TabletId; import org.apache.accumulo.core.dataImpl.KeyExtent; @@ -46,6 +47,8 @@ import org.apache.accumulo.core.util.UtilWaitThread; import org.apache.hadoop.io.Text; import org.junit.jupiter.api.Test; +import com.google.common.base.Preconditions; + public class ConfigurableScanServerSelectorTest { static class InitParams implements ScanServerSelector.InitParameters { @@ -140,7 +143,8 @@ public class ConfigurableScanServerSelectorTest { @Override public <T> Optional<T> waitUntil(Supplier<Optional<T>> condition, Duration maxWaitTime, String description) { - throw new UnsupportedOperationException(); + Preconditions.checkArgument(maxWaitTime.compareTo(Duration.ZERO) > 0); + throw new TimedOutException("Timed out w/o checking condition"); } } @@ -313,7 +317,7 @@ public class ConfigurableScanServerSelectorTest { // Intentionally put the default profile in 2nd position. There was a bug where config parsing // would fail if the default did not come first. var opts = Map.of("profiles", - "[" + profile1 + ", " + defaultProfile + "," + profile2 + "]".replace('\'', '"')); + ("[" + profile1 + ", " + defaultProfile + "," + profile2 + "]").replace('\'', '"')); runBusyTest(1000, 0, 5, 5, opts); runBusyTest(1000, 1, 20, 33, opts); @@ -372,14 +376,14 @@ public class ConfigurableScanServerSelectorTest { + "'attemptPlans':[{'servers':'100%', 'busyTimeout':'10m'}]}"; var opts1 = Map.of("profiles", - "[" + defaultProfile + ", " + profile1 + "," + profile2 + "]".replace('\'', '"')); + ("[" + defaultProfile + ", " + profile1 + "," + profile2 + "]").replace('\'', '"')); // two profiles activate on the scan type "mega", so should fail var exception = assertThrows(IllegalArgumentException.class, () -> runBusyTest(1000, 0, 5, 66, opts1)); assertTrue(exception.getMessage().contains("mega")); - var opts2 = Map.of("profiles", "[" + profile1 + "]".replace('\'', '"')); + var opts2 = Map.of("profiles", ("[" + profile1 + "]").replace('\'', '"')); // missing a default profile, so should fail exception = @@ -391,11 +395,21 @@ public class ConfigurableScanServerSelectorTest { @Test public void testNoScanServers() { - ConfigurableScanServerSelector selector = new ConfigurableScanServerSelector(); - selector.init(new InitParams(Set.of())); - + // run a 1st test assuming default config does not fallback to tservers, so should timeout + var selector1 = new ConfigurableScanServerSelector(); + selector1.init(new InitParams(Set.of())); var tabletId = nti("1", "m"); - ScanServerSelections actions = selector.selectServers(new SelectorParams(tabletId)); + assertThrows(TimedOutException.class, + () -> selector1.selectServers(new SelectorParams(tabletId))); + + // run a 2nd test with config that does fall back to tablet servers + String profile = "{'isDefault':'true','maxBusyTimeout':'60m','busyTimeoutMultiplier':2," + + " 'timeToWaitForScanServers': '0s'," + + "'attemptPlans':[{'servers':'100%', 'busyTimeout':'10m'}]}"; + var opts1 = Map.of("profiles", ("[" + profile + "]").replace('\'', '"')); + var selector2 = new ConfigurableScanServerSelector(); + selector2.init(new InitParams(Set.of(), opts1)); + ScanServerSelections actions = selector2.selectServers(new SelectorParams(tabletId)); assertNull(actions.getScanServer(tabletId)); assertEquals(Duration.ZERO, actions.getDelay()); assertEquals(Duration.ZERO, actions.getBusyTimeout()); @@ -416,7 +430,7 @@ public class ConfigurableScanServerSelectorTest { + "'attemptPlans':[{'servers':'100%', 'busyTimeout':'10m'}]}"; var opts = Map.of("profiles", - "[" + defaultProfile + ", " + profile1 + "," + profile2 + "]".replace('\'', '"')); + ("[" + defaultProfile + ", " + profile1 + "," + profile2 + "]").replace('\'', '"')); ConfigurableScanServerSelector selector = new ConfigurableScanServerSelector(); var dg = ScanServerSelector.DEFAULT_SCAN_SERVER_GROUP_NAME; @@ -493,7 +507,7 @@ public class ConfigurableScanServerSelectorTest { "{'isDefault':true,'maxBusyTimeout':'5m','busyTimeoutMultiplier':4,'timeToWaitForScanServers':'120s'," + "'attemptPlans':[{'servers':'100%', 'busyTimeout':'60s'}]}"; - var opts = Map.of("profiles", "[" + defaultProfile + "]".replace('\'', '"')); + var opts = Map.of("profiles", ("[" + defaultProfile + "]").replace('\'', '"')); ConfigurableScanServerSelector selector = new ConfigurableScanServerSelector(); diff --git a/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java b/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java index 6773251223..0e2aa02e8d 100644 --- a/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java +++ b/test/src/main/java/org/apache/accumulo/test/ScanServerGroupConfigurationIT.java @@ -57,6 +57,7 @@ public class ScanServerGroupConfigurationIT extends SharedMiniClusterBase { " \"maxBusyTimeout\": \"5m\","+ " \"busyTimeoutMultiplier\": 8,"+ " \"scanTypeActivations\": [],"+ + " \"timeToWaitForScanServers\":\"0s\","+ " \"attemptPlans\": ["+ " {"+ " \"servers\": \"3\","+ @@ -80,6 +81,7 @@ public class ScanServerGroupConfigurationIT extends SharedMiniClusterBase { " \"busyTimeoutMultiplier\": 8,"+ " \"group\": \"GROUP1\","+ " \"scanTypeActivations\": [\"use_group1\"],"+ + " \"timeToWaitForScanServers\":\"0s\","+ " \"attemptPlans\": ["+ " {"+ " \"servers\": \"3\","+ diff --git a/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java b/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java index f8dde71bbf..6868d91a9c 100644 --- a/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java +++ b/test/src/main/java/org/apache/accumulo/test/ScanServerIT_NoServers.java @@ -85,7 +85,16 @@ public class ScanServerIT_NoServers extends SharedMiniClusterBase { @Test public void testScan() throws Exception { - try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) { + var clientProps = new Properties(); + clientProps.putAll(getClientProps()); + String scanServerSelectorProfiles = "[{'isDefault':true,'maxBusyTimeout':'5m'," + + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[], 'timeToWaitForScanServers':'0s'," + + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]"; + clientProps.put("scan.server.selector.impl", ConfigurableScanServerSelector.class.getName()); + clientProps.put("scan.server.selector.opts.profiles", + scanServerSelectorProfiles.replace("'", "\"")); + + try (AccumuloClient client = Accumulo.newClient().from(clientProps).build()) { String tableName = getUniqueNames(1)[0]; final int ingestedEntryCount = createTableAndIngest(client, tableName, null, 10, 10, "colf"); @@ -107,7 +116,16 @@ public class ScanServerIT_NoServers extends SharedMiniClusterBase { @Test public void testBatchScan() throws Exception { - try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) { + var clientProps = new Properties(); + clientProps.putAll(getClientProps()); + String scanServerSelectorProfiles = "[{'isDefault':true,'maxBusyTimeout':'5m'," + + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[], 'timeToWaitForScanServers':'0s'," + + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]"; + clientProps.put("scan.server.selector.impl", ConfigurableScanServerSelector.class.getName()); + clientProps.put("scan.server.selector.opts.profiles", + scanServerSelectorProfiles.replace("'", "\"")); + + try (AccumuloClient client = Accumulo.newClient().from(clientProps).build()) { String tableName = getUniqueNames(1)[0]; final int ingestedEntryCount = createTableAndIngest(client, tableName, null, 10, 10, "colf"); @@ -128,17 +146,7 @@ public class ScanServerIT_NoServers extends SharedMiniClusterBase { @Test public void testScanWithNoTserverFallback() throws Exception { - - var clientProps = new Properties(); - clientProps.putAll(getClientProps()); - String scanServerSelectorProfiles = "[{'isDefault':true,'maxBusyTimeout':'5m'," - + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[], 'timeToWaitForScanServers':'120s'," - + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]"; - clientProps.put("scan.server.selector.impl", ConfigurableScanServerSelector.class.getName()); - clientProps.put("scan.server.selector.opts.profiles", - scanServerSelectorProfiles.replace("'", "\"")); - - try (AccumuloClient client = Accumulo.newClient().from(clientProps).build()) { + try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) { String tableName = getUniqueNames(1)[0]; createTableAndIngest(client, tableName, null, 10, 10, "colf"); @@ -156,17 +164,7 @@ public class ScanServerIT_NoServers extends SharedMiniClusterBase { @Test public void testBatchScanWithNoTserverFallback() throws Exception { - - var clientProps = new Properties(); - clientProps.putAll(getClientProps()); - String scanServerSelectorProfiles = "[{'isDefault':true,'maxBusyTimeout':'5m'," - + "'busyTimeoutMultiplier':8, 'scanTypeActivations':[], 'timeToWaitForScanServers':'120s'," - + "'attemptPlans':[{'servers':'3', 'busyTimeout':'1s'}]}]"; - clientProps.put("scan.server.selector.impl", ConfigurableScanServerSelector.class.getName()); - clientProps.put("scan.server.selector.opts.profiles", - scanServerSelectorProfiles.replace("'", "\"")); - - try (AccumuloClient client = Accumulo.newClient().from(clientProps).build()) { + try (AccumuloClient client = Accumulo.newClient().from(getClientProps()).build()) { String tableName = getUniqueNames(1)[0]; createTableAndIngest(client, tableName, null, 10, 10, "colf");