ACCUMULO-564 changes for 0.23 compile compatibility git-svn-id: https://svn.apache.org/repos/asf/accumulo/trunk/examples/wikisearch@1332674 13f79535-47bb-0310-9956-ffa450edef68 (cherry picked from commit 201ebbadb38ba5f71e870ea28c6e0120a027c8e3)
Reason: Hadoop2 Compat Author: Billie Rinaldi <bil...@apache.org> Ref: ACCUMULO-1977 Project: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/commit/c9f213e9 Tree: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/tree/c9f213e9 Diff: http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/diff/c9f213e9 Branch: refs/heads/master Commit: c9f213e99defefce72146caf68f41ad0bf343aa7 Parents: 0c429f9 Author: Billie Rinaldi <bil...@apache.org> Authored: Tue May 1 14:35:39 2012 +0000 Committer: Sean Busbey <bus...@clouderagovt.com> Committed: Tue Dec 10 14:06:40 2013 -0600 ---------------------------------------------------------------------- .../wikisearch/ingest/WikipediaIngester.java | 10 +- .../ingest/StandaloneStatusReporter.java | 70 -------- .../wikisearch/ingest/WikipediaMapperTest.java | 163 ------------------- .../logic/StandaloneStatusReporter.java | 4 + .../wikisearch/logic/TestQueryLogic.java | 31 ++-- 5 files changed, 28 insertions(+), 250 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java ---------------------------------------------------------------------- diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java index 50415a7..d4fa1c6 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java @@ -66,8 +66,8 @@ public class WikipediaIngester extends Configured implements Tool { System.exit(res); } - private void createTables(TableOperations tops, String tableName) throws AccumuloException, AccumuloSecurityException, TableNotFoundException, - TableExistsException { + public static void createTables(TableOperations tops, String tableName, boolean configureLocalityGroups) throws AccumuloException, AccumuloSecurityException, + TableNotFoundException, TableExistsException { // Create the shard table String indexTableName = tableName + "Index"; String reverseIndexTableName = tableName + "ReverseIndex"; @@ -94,7 +94,9 @@ public class WikipediaIngester extends Configured implements Tool { } // Set the locality group for the full content column family - tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY)))); + if (configureLocalityGroups) + tops.setLocalityGroups(tableName, + Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY)))); } @@ -143,7 +145,7 @@ public class WikipediaIngester extends Configured implements Tool { TableOperations tops = connector.tableOperations(); - createTables(tops, tablename); + createTables(tops, tablename, true); configureJob(job); http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java ---------------------------------------------------------------------- diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java deleted file mode 100644 index 6af1e9b..0000000 --- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/StandaloneStatusReporter.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.ingest; - -import org.apache.hadoop.mapreduce.Counter; -import org.apache.hadoop.mapreduce.Counters; -import org.apache.hadoop.mapreduce.StatusReporter; - -public class StandaloneStatusReporter extends StatusReporter { - - private Counters c = new Counters(); - - private long filesProcessed = 0; - private long recordsProcessed = 0; - - public Counters getCounters() { - return c; - } - - @Override - public Counter getCounter(Enum<?> name) { - return c.findCounter(name); - } - - @Override - public Counter getCounter(String group, String name) { - return c.findCounter(group, name); - } - - @Override - public void progress() { - // do nothing - } - - @Override - public void setStatus(String status) { - // do nothing - } - - public long getFilesProcessed() { - return filesProcessed; - } - - public long getRecordsProcessed() { - return recordsProcessed; - } - - public void incrementFilesProcessed() { - filesProcessed++; - recordsProcessed = 0; - } - - public void incrementRecordsProcessed() { - recordsProcessed++; - } -} http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java ---------------------------------------------------------------------- diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java deleted file mode 100644 index c659ec4..0000000 --- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapperTest.java +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.ingest; - -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.util.HashMap; -import java.util.Map.Entry; - -import junit.framework.Assert; - -import org.apache.accumulo.core.client.BatchWriter; -import org.apache.accumulo.core.client.Connector; -import org.apache.accumulo.core.client.MutationsRejectedException; -import org.apache.accumulo.core.client.Scanner; -import org.apache.accumulo.core.client.mock.MockInstance; -import org.apache.accumulo.core.data.Key; -import org.apache.accumulo.core.data.Mutation; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.data.Value; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.core.util.ContextFactory; -import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RawLocalFileSystem; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.input.FileSplit; -import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; -import org.junit.Before; - -/** - * Load some data into mock accumulo - */ -public class WikipediaMapperTest { - - private static final String METADATA_TABLE_NAME = "wikiMetadata"; - - private static final String TABLE_NAME = "wiki"; - - private static final String INDEX_TABLE_NAME = "wikiIndex"; - - private static final String RINDEX_TABLE_NAME = "wikiReverseIndex"; - - private class MockAccumuloRecordWriter extends RecordWriter<Text,Mutation> { - @Override - public void write(Text key, Mutation value) throws IOException, InterruptedException { - try { - writerMap.get(key).addMutation(value); - } catch (MutationsRejectedException e) { - throw new IOException("Error adding mutation", e); - } - } - - @Override - public void close(TaskAttemptContext context) throws IOException, InterruptedException { - try { - for (BatchWriter w : writerMap.values()) { - w.flush(); - w.close(); - } - } catch (MutationsRejectedException e) { - throw new IOException("Error closing Batch Writer", e); - } - } - - } - - private Connector c = null; - private Configuration conf = new Configuration(); - private HashMap<Text,BatchWriter> writerMap = new HashMap<Text,BatchWriter>(); - - @Before - public void setup() throws Exception { - - conf.set(AggregatingRecordReader.START_TOKEN, "<page>"); - conf.set(AggregatingRecordReader.END_TOKEN, "</page>"); - conf.set(WikipediaConfiguration.TABLE_NAME, TABLE_NAME); - conf.set(WikipediaConfiguration.NUM_PARTITIONS, "1"); - conf.set(WikipediaConfiguration.NUM_GROUPS, "1"); - - MockInstance i = new MockInstance(); - c = i.getConnector("root", "pass"); - c.tableOperations().delete(METADATA_TABLE_NAME); - c.tableOperations().delete(TABLE_NAME); - c.tableOperations().delete(INDEX_TABLE_NAME); - c.tableOperations().delete(RINDEX_TABLE_NAME); - c.tableOperations().create(METADATA_TABLE_NAME); - c.tableOperations().create(TABLE_NAME); - c.tableOperations().create(INDEX_TABLE_NAME); - c.tableOperations().create(RINDEX_TABLE_NAME); - - writerMap.put(new Text(METADATA_TABLE_NAME), c.createBatchWriter(METADATA_TABLE_NAME, 1000L, 1000L, 1)); - writerMap.put(new Text(TABLE_NAME), c.createBatchWriter(TABLE_NAME, 1000L, 1000L, 1)); - writerMap.put(new Text(INDEX_TABLE_NAME), c.createBatchWriter(INDEX_TABLE_NAME, 1000L, 1000L, 1)); - writerMap.put(new Text(RINDEX_TABLE_NAME), c.createBatchWriter(RINDEX_TABLE_NAME, 1000L, 1000L, 1)); - - TaskAttemptContext context = ContextFactory.createTaskAttemptContext(conf); - - RawLocalFileSystem fs = new RawLocalFileSystem(); - fs.setConf(conf); - - URL url = ClassLoader.getSystemResource("enwiki-20110901-001.xml"); - Assert.assertNotNull(url); - File data = new File(url.toURI()); - Path tmpFile = new Path(data.getAbsolutePath()); - - // Setup the Mapper - InputSplit split = new FileSplit(tmpFile, 0, fs.pathToFile(tmpFile).length(), null); - AggregatingRecordReader rr = new AggregatingRecordReader(); - Path ocPath = new Path(tmpFile, "oc"); - OutputCommitter oc = new FileOutputCommitter(ocPath, context); - fs.deleteOnExit(ocPath); - StandaloneStatusReporter sr = new StandaloneStatusReporter(); - rr.initialize(split, context); - MockAccumuloRecordWriter rw = new MockAccumuloRecordWriter(); - WikipediaMapper mapper = new WikipediaMapper(); - - // Load data into Mock Accumulo - Mapper<LongWritable,Text,Text,Mutation>.Context con = ContextFactory.createMapContext(mapper, context, rr, rw, oc, sr, split); - mapper.run(con); - - // Flush and close record writers. - rw.close(context); - - } - - private void debugQuery(String tableName) throws Exception { - Scanner s = c.createScanner(tableName, new Authorizations("all")); - Range r = new Range(); - s.setRange(r); - for (Entry<Key,Value> entry : s) - System.out.println(entry.getKey().toString() + " " + entry.getValue().toString()); - } - - public void testViewAllData() throws Exception { - debugQuery(METADATA_TABLE_NAME); - debugQuery(TABLE_NAME); - debugQuery(INDEX_TABLE_NAME); - debugQuery(RINDEX_TABLE_NAME); - } -} http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java ---------------------------------------------------------------------- diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java index 35743b3..a3b90d2 100644 --- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java +++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/StandaloneStatusReporter.java @@ -67,4 +67,8 @@ public class StandaloneStatusReporter extends StatusReporter { public void incrementRecordsProcessed() { recordsProcessed++; } + + public float getProgress() { + return 0; + } } http://git-wip-us.apache.org/repos/asf/accumulo-wikisearch/blob/c9f213e9/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java ---------------------------------------------------------------------- diff --git a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java index 938f01b..9079d79 100644 --- a/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java +++ b/query/src/test/java/org/apache/accumulo/examples/wikisearch/logic/TestQueryLogic.java @@ -16,6 +16,8 @@ */ package org.apache.accumulo.examples.wikisearch.logic; +import static org.junit.Assert.assertEquals; + import java.io.File; import java.io.IOException; import java.net.URL; @@ -38,12 +40,12 @@ import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.util.ContextFactory; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration; +import org.apache.accumulo.examples.wikisearch.ingest.WikipediaIngester; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaMapper; import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator; import org.apache.accumulo.examples.wikisearch.reader.AggregatingRecordReader; import org.apache.accumulo.examples.wikisearch.sample.Document; -import org.apache.accumulo.examples.wikisearch.sample.Field; import org.apache.accumulo.examples.wikisearch.sample.Results; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -117,11 +119,8 @@ public class TestQueryLogic { MockInstance i = new MockInstance(); c = i.getConnector("root", ""); + WikipediaIngester.createTables(c.tableOperations(), TABLE_NAME, false); for (String table : TABLE_NAMES) { - try { - c.tableOperations().delete(table); - } catch (Exception ex) {} - c.tableOperations().create(table); writerMap.put(new Text(table), c.createBatchWriter(table, 1000L, 1000L, 1)); } @@ -162,7 +161,7 @@ public class TestQueryLogic { } void debugQuery(String tableName) throws Exception { - Scanner s = c.createScanner(tableName, new Authorizations()); + Scanner s = c.createScanner(tableName, new Authorizations("all")); Range r = new Range(); s.setRange(r); for (Entry<Key,Value> entry : s) @@ -170,17 +169,23 @@ public class TestQueryLogic { } @Test - public void testTitle() { + public void testTitle() throws Exception { Logger.getLogger(AbstractQueryLogic.class).setLevel(Level.OFF); Logger.getLogger(RangeCalculator.class).setLevel(Level.OFF); List<String> auths = new ArrayList<String>(); auths.add("enwiki"); - Results results = table.runQuery(c, auths, "TITLE == 'afghanistanhistory'", null, null, null); - for (Document doc : results.getResults()) { - System.out.println("id: " + doc.getId()); - for (Field field : doc.getFields()) - System.out.println(field.getFieldName() + " -> " + field.getFieldValue()); - } + + Results results = table.runQuery(c, auths, "TITLE == 'asphalt' or TITLE == 'abacus' or TITLE == 'acid' or TITLE == 'acronym'", null, null, null); + List<Document> docs = results.getResults(); + assertEquals(4, docs.size()); + + /* + * debugQuery(METADATA_TABLE_NAME); debugQuery(TABLE_NAME); debugQuery(INDEX_TABLE_NAME); debugQuery(RINDEX_TABLE_NAME); + * + * results = table.runQuery(c, auths, "TEXT == 'abacus'", null, null, null); docs = results.getResults(); assertEquals(4, docs.size()); for (Document doc : + * docs) { System.out.println("id: " + doc.getId()); for (Field field : doc.getFields()) System.out.println(field.getFieldName() + " -> " + + * field.getFieldValue()); } + */ } }