Repository: accumulo-wikisearch
Updated Branches:
  refs/heads/master f11759eb5 -> 1990979f6


ACCUMULO-375 added compression and increased the minimum split size

git-svn-id: 
https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1241940 
13f79535-47bb-0310-9956-ffa450edef68


Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/57bf9cf3
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/57bf9cf3
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/57bf9cf3

Branch: refs/heads/master
Commit: 57bf9cf34b891c12d4361e8d5224a2aa38b63a6c
Parents: 266455b
Author: Adam Fuchs <afu...@apache.org>
Authored: Wed Feb 8 15:37:04 2012 +0000
Committer: Adam Fuchs <afu...@apache.org>
Committed: Wed Feb 8 15:37:04 2012 +0000

----------------------------------------------------------------------
 .../examples/wikisearch/ingest/WikipediaPartitionedIngester.java | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/57bf9cf3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
----------------------------------------------------------------------
diff --git 
a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
 
b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
index 43f5e29..5571290 100644
--- 
a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
+++ 
b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
@@ -178,6 +179,8 @@ public class WikipediaPartitionedIngester extends 
Configured implements Tool {
     partitionerJob.setOutputFormatClass(SequenceFileOutputFormat.class);
     Path outputDir = 
WikipediaConfiguration.getPartitionedArticlesPath(partitionerConf);
     SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir);
+    SequenceFileOutputFormat.setCompressOutput(partitionerJob, true);
+    SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, 
CompressionType.RECORD);
     
     return partitionerJob.waitForCompletion(true) ? 0 : 1;
   }
@@ -209,6 +212,7 @@ public class WikipediaPartitionedIngester extends 
Configured implements Tool {
     // setup input format
     ingestJob.setInputFormatClass(SequenceFileInputFormat.class);
     SequenceFileInputFormat.setInputPaths(ingestJob, 
WikipediaConfiguration.getPartitionedArticlesPath(ingestConf));
+    SequenceFileInputFormat.setMinInputSplitSize(ingestJob, 1l << 28);
 
     // setup output format
     ingestJob.setMapOutputKeyClass(Text.class);

Reply via email to