ACCUMULO-2005: Add a 3rd argument to functional test RunTests.java to allow a timeout factor for mapred.task.timeout and run.py -f flag
Signed-off-by: Sean Busbey <bus...@cloudera.com> Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/6faac421 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/6faac421 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/6faac421 Branch: refs/heads/ACCUMULO-2061 Commit: 6faac421b560c9ce92f1d4db3c0b60d830498a71 Parents: 4fd48fd Author: Hung Pham <hp...@cloudera.com> Authored: Fri Jan 17 09:36:44 2014 -0500 Committer: Sean Busbey <bus...@cloudera.com> Committed: Mon Mar 10 09:05:46 2014 -0500 ---------------------------------------------------------------------- .../server/test/functional/RunTests.java | 27 +++++++++++++++++--- test/system/auto/README | 10 ++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/6faac421/src/server/src/main/java/org/apache/accumulo/server/test/functional/RunTests.java ---------------------------------------------------------------------- diff --git a/src/server/src/main/java/org/apache/accumulo/server/test/functional/RunTests.java b/src/server/src/main/java/org/apache/accumulo/server/test/functional/RunTests.java index 4dac215..31e3198 100644 --- a/src/server/src/main/java/org/apache/accumulo/server/test/functional/RunTests.java +++ b/src/server/src/main/java/org/apache/accumulo/server/test/functional/RunTests.java @@ -73,12 +73,15 @@ public class RunTests extends Configured implements Tool { private static final Logger log = Logger.getLogger(RunTests.class); private Job job = null; - + static private Integer timeoutFactor = 1; + static final String TIMEOUT_FACTOR = RunTests.class.getName() + ".timeoutFactor"; + static public class TestMapper extends Mapper<LongWritable,Text,Text,Text> { private static final String REDUCER_RESULT_START = "::::: "; private static final int RRS_LEN = REDUCER_RESULT_START.length(); private Text result = new Text(); + String mapperTimeoutFactor = null; private static enum Outcome { SUCCESS, FAILURE, ERROR, UNEXPECTED_SUCCESS, EXPECTED_FAILURE @@ -95,7 +98,7 @@ public class RunTests extends Configured implements Tool { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - List<String> cmd = Arrays.asList("/usr/bin/python", "test/system/auto/run.py", "-m", "-t", value.toString()); + List<String> cmd = Arrays.asList("/usr/bin/python", "test/system/auto/run.py", "-m", "-f", mapperTimeoutFactor, "-t", value.toString()); log.info("Running test " + cmd); ProcessBuilder pb = new ProcessBuilder(cmd); pb.directory(new File(context.getConfiguration().get("accumulo.home"))); @@ -129,7 +132,11 @@ public class RunTests extends Configured implements Tool { p.waitFor(); } - + + @Override + protected void setup(Mapper.Context context) throws IOException, InterruptedException { + mapperTimeoutFactor = Integer.toString(context.getConfiguration().getInt(TIMEOUT_FACTOR, timeoutFactor)); + } } @Override @@ -141,7 +148,11 @@ public class RunTests extends Configured implements Tool { Configuration conf = job.getConfiguration(); conf.setInt("mapred.max.split.size", 40); conf.set("accumulo.home", System.getenv("ACCUMULO_HOME")); - conf.setInt("mapred.task.timeout", 8 * 60 * 1000); + + // Taking third argument as scaling factor to setting mapred.task.timeout + // and TIMEOUT_FACTOR + conf.setInt("mapred.task.timeout", timeoutFactor * 8 * 60 * 1000); + conf.setInt(TIMEOUT_FACTOR, timeoutFactor); conf.setBoolean("mapred.map.tasks.speculative.execution", false); // set input @@ -179,6 +190,14 @@ public class RunTests extends Configured implements Tool { * @throws Exception */ public static void main(String[] args) throws Exception { + if (args.length > 2) { + try { + timeoutFactor = Integer.parseInt(args[2]); + } catch (NumberFormatException e) { + log.error("timeoutFactor must be an integer: ", e); + System.exit(1); + } + } RunTests tests = new RunTests(); ToolRunner.run(new Configuration(), tests, args); tests.job.waitForCompletion(true); http://git-wip-us.apache.org/repos/asf/accumulo/blob/6faac421/test/system/auto/README ---------------------------------------------------------------------- diff --git a/test/system/auto/README b/test/system/auto/README index 45ed158..7722c63 100644 --- a/test/system/auto/README +++ b/test/system/auto/README @@ -69,6 +69,16 @@ cluster at your disposal, you can run the tests as a MapReduce job: The example above runs every test. You can trim the tests file to include only the tests you wish to run. +You may specify a 'timeout factor' via an optional integer as a third argument: + + $ ./bin/accumulo org.apache.accumulo.server.test.functional.RunTests \ + /user/hadoop/tests /user/hadoop/results timeout_factor + +Where 'timeout_factor' indicates how much we should scale up timeouts. It will +be used to set both mapred.task.timeout and the "-f" flag used by run.py. If +not given, 'timeout_factor' defaults to 1, which corresponds to a +mapred.task.timeout of 480 seconds. + In some clusters, the user under which MR jobs run is different from the user under which Accumulo is installed, and this can cause failures running the tests. Various configuration and permission changes can be made to help the