Updated Branches: refs/heads/master b64149d78 -> c8999e693
ACCUMULO-1458 - Sean Hickey's patch for sample using Password token Signed-off-by: John Vines <jvi...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/c8999e69 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/c8999e69 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/c8999e69 Branch: refs/heads/master Commit: c8999e6937ad0af8de4ba928ebab33c55e3d0567 Parents: b64149d Author: John Vines <jvi...@gmail.com> Authored: Thu Oct 31 17:39:16 2013 -0400 Committer: John Vines <jvi...@gmail.com> Committed: Fri Nov 1 18:28:31 2013 -0400 ---------------------------------------------------------------------- .../simple/mapreduce/TokenFileWordCount.java | 97 ++++++++++++++++++++ .../main/resources/docs/examples/README.mapred | 59 +++++++++++- 2 files changed, 155 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8999e69/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java ---------------------------------------------------------------------- diff --git a/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java new file mode 100644 index 0000000..15a7765 --- /dev/null +++ b/examples/simple/src/main/java/org/apache/accumulo/examples/simple/mapreduce/TokenFileWordCount.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.accumulo.examples.simple.mapreduce; + +import java.io.IOException; + +import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat; +import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.util.CachedConfiguration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +/** + * A simple map reduce job that inserts word counts into accumulo. See the README for instructions on how to run this. This version does not use the ClientOpts + * class to parse arguments as an example of using AccumuloInputFormat and AccumuloOutputFormat directly. See README.mapred for more details. + * + */ +public class TokenFileWordCount extends Configured implements Tool { + + public static class MapClass extends Mapper<LongWritable,Text,Text,Mutation> { + @Override + public void map(LongWritable key, Text value, Context output) throws IOException { + String[] words = value.toString().split("\\s+"); + + for (String word : words) { + + Mutation mutation = new Mutation(new Text(word)); + mutation.put(new Text("count"), new Text("20080906"), new Value("1".getBytes())); + + try { + output.write(null, mutation); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + } + + public int run(String[] args) throws Exception { + + String instance = args[0]; + String zookeepers = args[1]; + String user = args[2]; + String tokenFile = args[3]; + String input = args[4]; + String tableName = args[5]; + + Job job = new Job(getConf(), TokenFileWordCount.class.getName()); + job.setJarByClass(this.getClass()); + + job.setInputFormatClass(TextInputFormat.class); + TextInputFormat.setInputPaths(job, input); + + job.setMapperClass(MapClass.class); + + job.setNumReduceTasks(0); + + job.setOutputFormatClass(AccumuloOutputFormat.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(Mutation.class); + + // AccumuloInputFormat not used here, but it uses the same functions. + AccumuloOutputFormat.setZooKeeperInstance(job, instance, zookeepers); + AccumuloOutputFormat.setConnectorInfo(job, user, tokenFile); + AccumuloOutputFormat.setCreateTables(job, true); + AccumuloOutputFormat.setDefaultTableName(job, tableName); + + job.waitForCompletion(true); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(CachedConfiguration.getInstance(), new TokenFileWordCount(), args); + System.exit(res); + } +} http://git-wip-us.apache.org/repos/asf/accumulo/blob/c8999e69/server/monitor/src/main/resources/docs/examples/README.mapred ---------------------------------------------------------------------- diff --git a/server/monitor/src/main/resources/docs/examples/README.mapred b/server/monitor/src/main/resources/docs/examples/README.mapred index 4acd306..b98140f 100644 --- a/server/monitor/src/main/resources/docs/examples/README.mapred +++ b/server/monitor/src/main/resources/docs/examples/README.mapred @@ -50,7 +50,7 @@ for the column family count. After creating the table, run the word count map reduce job. - $ bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.WordCount -i instance -z zookeepers --input /user/username/wc wordCount -u username -p password + $ bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.WordCount -i instance -z zookeepers --input /user/username/wc -t wordCount -u username -p password 11/02/07 18:20:11 INFO input.FileInputFormat: Total input paths to process : 1 11/02/07 18:20:12 INFO mapred.JobClient: Running job: job_201102071740_0003 @@ -95,3 +95,60 @@ org.apache.accumulo.examples.simple.mapreduce.UniqueColumns. This example computes the unique set of columns in a table and shows how a map reduce job can directly read a tables files from HDFS. +One more example available is +org.apache.accumulo.examples.simple.mapreduce.TokenFileWordCount. +The TokenFileWordCount example works exactly the same as the WordCount example +explained above except that it uses a token file rather than giving the +password directly to the map-reduce job (this avoids having the password +displayed in the job's configuration which is world-readable). + +To create a token file, use the create-token utility + + $ ./bin/accumulo create-token + +It defaults to creating a PasswordToken, but you can specify the token class +with -tc (requires the fully qualified class name). Based on the token class, +it will prompt you for each property required to create the token. + +The last value it prompts for is a local filename to save to. If this file +exists, it will append the new token to the end. Multiple tokens can exist in +a file, but only the first one for each user will be recognized. + +Rather than waiting for the prompts, you can specify some options when calling +create-token, for example + + $ ./bin/accumulo create-token -u root -p secret -f root.pw + +would create a token file containing a PasswordToken for +user 'root' with password 'secret' and saved to 'root.pw' + +This local file needs to be uploaded to hdfs to be used with the +map-reduce job. For example, if the file were 'root.pw' in the local directory: + + $ hadoop fs -put root.pw root.pw + +This would put 'root.pw' in the user's home directory in hdfs. + +Because the basic WordCount example uses Opts to parse its arguments +(which extends ClientOnRequiredTable), you can use a token file with +the basic WordCount example by calling the same command as explained above +except replacing the password with the token file (rather than -p, use -tf). + + $ ./bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.WordCount -i instance -z zookeepers --input /user/username/wc -t wordCount -u username -tf tokenfile + +In the above examples, username was 'root' and tokenfile was 'root.pw' + +However, if you don't want to use the Opts class to parse arguments, +the TokenFileWordCount is an example of using the token file manually. + + $ bin/tool.sh lib/accumulo-examples-simple.jar org.apache.accumulo.examples.simple.mapreduce.TokenFileWordCount instance zookeepers username tokenfile /user/username/wc wordCount + +The results should be the same as the WordCount example except that the +authentication token was not stored in the configuration. It was instead +stored in a file that the map-reduce job pulled into the distributed cache. +(If you ran either of these on the same table right after the +WordCount example, then the resulting counts should just double.) + + + +