ACCUMULO-375 made min input split size configurable git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245684 13f79535-47bb-0310-9956-ffa450edef68
Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/e24faaf9 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/e24faaf9 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/e24faaf9 Branch: refs/heads/1.4.5-SNAPSHOT Commit: e24faaf99b05f5c1d68bf07444043ef9bf5ba048 Parents: 2e366aa Author: Adam Fuchs <afu...@apache.org> Authored: Fri Feb 17 18:00:57 2012 +0000 Committer: Adam Fuchs <afu...@apache.org> Committed: Fri Feb 17 18:00:57 2012 +0000 ---------------------------------------------------------------------- .../examples/wikisearch/ingest/WikipediaConfiguration.java | 5 +++++ .../wikisearch/ingest/WikipediaPartitionedIngester.java | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e24faaf9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java index a84d90c..27a28a1 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java @@ -56,6 +56,7 @@ public class WikipediaConfiguration { public final static String BULK_INGEST_DIR = "wikipedia.bulk.ingest.dir"; public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir"; public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size"; + public final static String PARTITIONED_INPUT_MIN_SPLIT_SIZE = "wikipedia.min.input.split.size"; public static String getUser(Configuration conf) { @@ -130,6 +131,10 @@ public class WikipediaConfiguration { return new Path(conf.get(PARTITIONED_ARTICLES_DIRECTORY)); } + public static long getMinInputSplitSize(Configuration conf) { + return conf.getLong(PARTITIONED_INPUT_MIN_SPLIT_SIZE, 1l << 27); + } + public static boolean runPartitioner(Configuration conf) { return conf.getBoolean(RUN_PARTITIONER, false); } http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/e24faaf9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java index bcdee43..90b8308 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java @@ -217,7 +217,8 @@ public class WikipediaPartitionedIngester extends Configured implements Tool { // setup input format ingestJob.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf)); - SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28); + // TODO make split size configurable + SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf)); // setup output format ingestJob.setMapOutputKeyClass(Text.class);