标签:hadoop
1) hdfs文件系统准备工作
a) # hadoop fs –ls /user/root #查看hdfs文件系统
b) # hadoop fs -rm /user/root/output02/part-r-00000
c) 删除文档,删除文件夹
d) # hadoop fs -rm –r /user/root/output02
e) # hadoop fs –mkdir –p input/ncdc
f) 解压缩输入文件,hadoop无法识别.zip或者.rar
g) # hadoop fs -put ../input_tmp/* input/ncdc
h) 同一个任务对应的input文件内容格式必须一致。
2) 编写程序
a) MaxTemperature.java
// cc MaxTemperature Application to find the maximum temperature in the weather dataset
// vv MaxTemperature
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MaxTemperature {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
// ^^ MaxTemperature
b) MaxTemperatureMapper.java
// cc MaxTemperatureMapper Mapper for maximum temperature example
// vv MaxTemperatureMapper
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MaxTemperatureMapper
extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final int MISSING = 9999;
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87) == ‘+‘) { // parseInt doesn‘t like leading plus signs
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new Text(year), new IntWritable(airTemperature));
}
}
}
// ^^ MaxTemperatureMapper
c) MaxTemperatureReducer.java
// cc MaxTemperatureReducer Reducer for maximum temperature example
// vv MaxTemperatureReducer
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTemperatureReducer
extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values,
Context context)
throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write(key, new IntWritable(maxValue));
}
}
// ^^ MaxTemperatureReducer
3) eclipse上运行
a) 创建包org.hadoop.ncdc,编译上述3个Java类。
b) run as → Java application → arguments
hdfs://master:9000/user/root/input/ncdc hdfs://master:9000/user/root/output/ncdc
c) # hadoop fs -ls output/ncdc
d) # hadoop fs cat output/ncdc/part-r-00000
4) javac方式执行
a) vi classpath.sh添加
export HADOOP_HOME=/usr/local/hadoop2.5
export CLASSPATH=.:/usr/local/jdk1.7/lib:/usr/local/jdk1.7/jre/lib
for f in $HADOOP_HOME/share/hadoop/common/hadoop-*.jar;do
export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/hdfs/hadoop-*.jar;do
export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*.jar;do
export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/yarn/hadoop-*.jar;do
export CLASSPATH=$CLASSPATH:$f
done
export CLASSPATH=$CLASSPATH:$HADOOP_HOME/share/common/lib:$HADOOP_HOME/share/hdfs/lib:$HADOOP_HOME/share/mapreduce/lib:$HADOOP_HOME/share/tools/lib:$HADOOP_HOME/share/yarn/lib
b) cat /usr/local/hadoop2.5/classpath.sh>> /etc/profile
c) cat /usr/local/hadoop2.5/classpath.sh >> /usr/local/hadoop2.5/etc/hadoop/hadoop-env.sh
d) source /etc/profile
e) 重启hadoop服务
f) # javac MaxTemperatureMapper.java -d .
其他类也一样,注意先编译最底层类,编译完成的class文件在Java程序的package路径下
g) # jar -cvf MaxTemperature.jar org #打成jar包
h) # jar -tvf MaxTemperature.jar #查看jar包目录结构
i) # hadoop jar MaxTemperature.jar org/hadoop/ncdc/MaxTemperature input/ncdc output/ncdc #运行jar包
hadoop jar 包名 程序主类名 输入文件夹 输出文件夹
j) # hadoop fs -ls output/ncdc
k) # hadoop fs -cat output/ncdc/part-r-00000
5) eclipse输出jar包运行
a) 右键export→jar→生成jar包
由于夸系统,故无需在打jar包过程中附加.classpath
b) 因已经配置了classpath,Linux上运作jar包即可。
版权声明:本文为博主原创文章,未经博主允许不得转载。
hadoop学习笔记(七)——hadoop权威指南中天气数据运行
标签:hadoop
原文地址:http://blog.csdn.net/thinkpadshi/article/details/47177119