标签:hadoop toolrunner
为了简化命令行方式运行作业,Hadoop自带了一些辅助类。GenericOptionsParser是一个类,用来解释常用的Hadoop命令行选项,并根据需要,为Configuration对象设置相应的取值。通常不直接使用GenericOptionsParser,更方便的方式是:实现Tool接口,通过ToolRunner来运行应用程序,ToolRunner内部调用GenericOptionsParser。
A utility to help run Tools.
ToolRunner can be used to run classes implementing Tool interface. It works in conjunction with GenericOptionsParser to parse the generic hadoop command line arguments and modifies the Configuration of the Tool. The application-specific options are passed along without being modified.
public static int run(Configuration conf, Tool tool, String[] args) throws Exception
public static int run(Tool tool, String[] args) throws Exception
public static void printGenericCommandUsage(PrintStream out)
Unless explicitly turned off, Hadoop by default specifies two resources, loaded in-order from the classpath:
package org.jediael.hadoopdemo.toolrunnerdemo; import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class ToolRunnerDemo extends Configured implements Tool { static { //Configuration.addDefaultResource("hdfs-default.xml"); //Configuration.addDefaultResource("hdfs-site.xml"); //Configuration.addDefaultResource("mapred-default.xml"); //Configuration.addDefaultResource("mapred-site.xml"); } @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); for (Entry<String, String> entry : conf) { System.out.printf("%s=%s\n", entry.getKey(), entry.getValue()); } return 0; } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new ToolRunnerDemo(), args); System.exit(exitCode); } }
package org.jediael.hadoopdemo.toolrunnerdemo; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WordCount extends Configured implements Tool{ public static class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> { private final IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer token = new StringTokenizer(line); while (token.hasMoreTokens()) { word.set(token.nextToken()); context.write(word, one); } } } public static class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); } } @Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJarByClass(WordCount.class); job.setJobName("wordcount"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WordCountMap.class); job.setReducerClass(WordCountReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return(job.waitForCompletion(true)?0:-1); } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new WordCount(), args); System.exit(exitCode); } }运行程序:
[root@jediael project]# hadoop fs -mkdir wcin2 [root@jediael project]# hadoop fs -copyFromLocal /opt/jediael/apache-nutch-2.2.1/CHANGES.txt wcin2 [root@jediael project]# hadoop jar wordcount2.jar org.jediael.hadoopdemo.toolrunnerdemo.WordCount wcin2 wcout2
标签:hadoop toolrunner
原文地址:http://blog.csdn.net/jediael_lu/article/details/38751885