[hadoop]Windows下eclipse导入hadoop源码，编译WordCount

时间：2014-08-14 20:28:59 阅读：704 评论：0 收藏：0 [点我收藏+]

hadoop版本为hadoop1.2.1

eclipse版本为eclipse-standard-kepler-SR2-win32-x86_64

WordCount.java为hadoop-1.2.1\src\examples\org\apache\hadoop\examples\WordCount.java

 1 /**
 2  *  Licensed under the Apache License, Version 2.0 (the "License");
 3  *  you may not use this file except in compliance with the License.
 4  *  You may obtain a copy of the License at
 5  *
 6  *      http://www.apache.org/licenses/LICENSE-2.0
 7  *
 8  *  Unless required by applicable law or agreed to in writing, software
 9  *  distributed under the License is distributed on an "AS IS" BASIS,
10  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  *  See the License for the specific language governing permissions and
12  *  limitations under the License.
13  */
14 
15 
16 package org.apache.hadoop.examples;
17 
18 import java.io.IOException;
19 import java.util.StringTokenizer;
20 
21 import org.apache.hadoop.conf.Configuration;
22 import org.apache.hadoop.fs.Path;
23 import org.apache.hadoop.io.IntWritable;
24 import org.apache.hadoop.io.Text;
25 import org.apache.hadoop.mapreduce.Job;
26 import org.apache.hadoop.mapreduce.Mapper;
27 import org.apache.hadoop.mapreduce.Reducer;
28 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
29 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
30 import org.apache.hadoop.util.GenericOptionsParser;
31 
32 public class WordCount {
33 
34   public static class TokenizerMapper 
35        extends Mapper<Object, Text, Text, IntWritable>{
36     
37     private final static IntWritable one = new IntWritable(1);
38     private Text word = new Text();
39       
40     public void map(Object key, Text value, Context context
41                     ) throws IOException, InterruptedException {
42       StringTokenizer itr = new StringTokenizer(value.toString());
43       while (itr.hasMoreTokens()) {
44         word.set(itr.nextToken());
45         context.write(word, one);
46       }
47     }
48   }
49   
50   public static class IntSumReducer 
51        extends Reducer<Text,IntWritable,Text,IntWritable> {
52     private IntWritable result = new IntWritable();
53 
54     public void reduce(Text key, Iterable<IntWritable> values, 
55                        Context context
56                        ) throws IOException, InterruptedException {
57       int sum = 0;
58       for (IntWritable val : values) {
59         sum += val.get();
60       }
61       result.set(sum);
62       context.write(key, result);
63     }
64   }
65 
66   public static void main(String[] args) throws Exception {
67     Configuration conf = new Configuration();
68     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
69     if (otherArgs.length != 2) {
70       System.err.println("Usage: wordcount <in> <out>");
71       System.exit(2);
72     }
73     Job job = new Job(conf, "word count");
74     job.setJarByClass(WordCount.class);
75     job.setMapperClass(TokenizerMapper.class);
76     job.setCombinerClass(IntSumReducer.class);
77     job.setReducerClass(IntSumReducer.class);
78     job.setOutputKeyClass(Text.class);
79     job.setOutputValueClass(IntWritable.class);
80     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
81     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
82     System.exit(job.waitForCompletion(true) ? 0 : 1);
83   }
84 }

View Code

在eclipse中新建java project，project名为WordCount

在project中新建class，类名为WordCount

再将上述代码覆盖eclipse中的WordCount.java

并将页首的package改了wordcount,改后的源码如下

 1 package wordcount;
 2 
 3 import java.io.IOException;
 4 import java.util.StringTokenizer;
 5 
 6 import org.apache.hadoop.conf.Configuration;
 7 import org.apache.hadoop.fs.Path;
 8 import org.apache.hadoop.io.IntWritable;
 9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.Reducer;
13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
15 import org.apache.hadoop.util.GenericOptionsParser;
16 
17 public class WordCount {
18 
19   public static class TokenizerMapper 
20        extends Mapper<Object, Text, Text, IntWritable>{
21     
22     private final static IntWritable one = new IntWritable(1);
23     private Text word = new Text();
24       
25     public void map(Object key, Text value, Context context
26                     ) throws IOException, InterruptedException {
27       StringTokenizer itr = new StringTokenizer(value.toString());
28       while (itr.hasMoreTokens()) {
29         word.set(itr.nextToken());
30         context.write(word, one);
31       }
32     }
33   }
34   
35   public static class IntSumReducer 
36        extends Reducer<Text,IntWritable,Text,IntWritable> {
37     private IntWritable result = new IntWritable();
38 
39     public void reduce(Text key, Iterable<IntWritable> values, 
40                        Context context
41                        ) throws IOException, InterruptedException {
42       int sum = 0;
43       for (IntWritable val : values) {
44         sum += val.get();
45       }
46       result.set(sum);
47       context.write(key, result);
48     }
49   }
50 
51   public static void main(String[] args) throws Exception {
52     Configuration conf = new Configuration();
53     String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
54     if (otherArgs.length != 2) {
55       System.err.println("Usage: wordcount <in> <out>");
56       System.exit(2);
57     }
58     Job job = new Job(conf, "word count");
59     job.setJarByClass(WordCount.class);
60     job.setMapperClass(TokenizerMapper.class);
61     job.setCombinerClass(IntSumReducer.class);
62     job.setReducerClass(IntSumReducer.class);
63     job.setOutputKeyClass(Text.class);
64     job.setOutputValueClass(IntWritable.class);
65     FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
66     FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
67     System.exit(job.waitForCompletion(true) ? 0 : 1);
68   
69   }
70 }

View Code

 1 import org.apache.hadoop.conf.Configuration;
 2 import org.apache.hadoop.fs.Path;
 3 import org.apache.hadoop.io.IntWritable;
 4 import org.apache.hadoop.io.Text;
 5 import org.apache.hadoop.mapreduce.Job;
 6 import org.apache.hadoop.mapreduce.Mapper;
 7 import org.apache.hadoop.mapreduce.Reducer;
 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10 import org.apache.hadoop.util.GenericOptionsParser;

可以看到源码import了好几个hadoop自定义类，非JDK环境自带的类，所以需要把这些类导入eclipse中，不然编译器如何能找到这些类呢，得明确让编译器知道这些类所在位置。

方法一、直接把这些类找出，然后拷贝到当前WordCount文件夹中src文件夹当中

方法二、将这些类所在路径添加到src中，具体如下操作，右击WordCount，点击ProPerties

bubuko.com,布布扣

再在Source使用Link Source方法添加mapred，hdfs，core，tools这四个目录，因为hadoop中的源文件就在这四个目录当中。

bubuko.com,布布扣

已经把import中的类都添加到，会发现WordCount下多了四个目录

bubuko.com,布布扣

这时候编译并运行一下，会发现有如下错误

Exception in thread "main" java.lang.Error: Unresolved compilation problems: 
    The import org.apache.commons cannot be resolved
    The import org.apache.commons cannot be resolved
    The import org.codehaus cannot be resolved
    The import org.codehaus cannot be resolved
    Log cannot be resolved to a type
    LogFactory cannot be resolved
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    Log cannot be resolved to a type
    JsonFactory cannot be resolved to a type
    JsonFactory cannot be resolved to a type
    JsonGenerator cannot be resolved to a type

    at org.apache.hadoop.conf.Configuration.<init>(Configuration.java:60)
    at wordcount.WordCount.main(WordCount.java:52)

原因是缺少依赖的jar库文件，再把缺少的jar库文件添加入库即可。

使用Add External JARs添加hadoop1.2.1\lib目录下所有jar文件。

bubuko.com,布布扣