ACCUMULO-412 fix index search git-svn-id: https://svn.apache.org/repos/asf/incubator/accumulo/branches/1.4@1245631 13f79535-47bb-0310-9956-ffa450edef68
Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/27fa06e3 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/27fa06e3 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/27fa06e3 Branch: refs/heads/1.4.5-SNAPSHOT Commit: 27fa06e339eb1fcef4cabfa71576a017d7b109dc Parents: 1e05129 Author: Eric C. Newton <e...@apache.org> Authored: Fri Feb 17 16:03:47 2012 +0000 Committer: Eric C. Newton <e...@apache.org> Committed: Fri Feb 17 16:03:47 2012 +0000 ---------------------------------------------------------------------- .../ingest/WikipediaPartitionedMapper.java | 3 +-- .../wikisearch/ingest/WikipediaPartitioner.java | 21 +------------------- .../output/SortingRFileOutputFormat.java | 8 +++----- 3 files changed, 5 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/27fa06e3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java index 5e82a7d..bb4ae64 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedMapper.java @@ -42,14 +42,13 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; -import org.apache.log4j.Logger; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; public class WikipediaPartitionedMapper extends Mapper<Text,Article,Text,Mutation> { - private static final Logger log = Logger.getLogger(WikipediaPartitionedMapper.class); + // private static final Logger log = Logger.getLogger(WikipediaPartitionedMapper.class); public final static Charset UTF8 = Charset.forName("UTF-8"); public static final String DOCUMENT_COLUMN_FAMILY = "d"; http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/27fa06e3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java index 82af9fd..3507108 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitioner.java @@ -23,40 +23,21 @@ package org.apache.accumulo.examples.wikisearch.ingest; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.StringReader; import java.nio.charset.Charset; -import java.util.HashSet; -import java.util.IllegalFormatException; -import java.util.Map.Entry; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.accumulo.core.data.Mutation; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.security.ColumnVisibility; import org.apache.accumulo.examples.wikisearch.ingest.ArticleExtractor.Article; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit; -import org.apache.accumulo.examples.wikisearch.normalizer.LcNoDiacriticsNormalizer; -import org.apache.accumulo.examples.wikisearch.protobuf.Uid; -import org.apache.accumulo.examples.wikisearch.protobuf.Uid.List.Builder; -import org.apache.commons.codec.binary.Base64; -import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.log4j.Logger; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.wikipedia.analysis.WikipediaTokenizer; - -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Multimap; public class WikipediaPartitioner extends Mapper<LongWritable,Text,Text,Article> { - private static final Logger log = Logger.getLogger(WikipediaPartitioner.class); + // private static final Logger log = Logger.getLogger(WikipediaPartitioner.class); public final static Charset UTF8 = Charset.forName("UTF-8"); public static final String DOCUMENT_COLUMN_FAMILY = "d"; http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/27fa06e3/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java index d8c57c2..2738e2c 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/output/SortingRFileOutputFormat.java @@ -4,20 +4,18 @@ import java.io.IOException; import org.apache.accumulo.core.conf.AccumuloConfiguration; import org.apache.accumulo.core.data.Mutation; -import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.io.Text; -import org.apache.log4j.Logger; public class SortingRFileOutputFormat extends OutputFormat<Text,Mutation> { - private static final Logger log = Logger.getLogger(SortingRFileOutputFormat.class); + // private static final Logger log = Logger.getLogger(SortingRFileOutputFormat.class); public static final String PATH_NAME = "sortingrfileoutputformat.path"; public static final String MAX_BUFFER_SIZE = "sortingrfileoutputformat.max.buffer.size";