码迷,mamicode.com
首页 > 其他好文 > 详细

hadoop学习笔记(七)——hadoop权威指南中天气数据运行

时间:2015-07-31 23:34:18      阅读:136      评论:0      收藏:0      [点我收藏+]

标签:hadoop

1) hdfs文件系统准备工作
a) # hadoop fs –ls /user/root #查看hdfs文件系统
b) # hadoop fs -rm /user/root/output02/part-r-00000
c) 删除文档,删除文件夹
d) # hadoop fs -rm –r /user/root/output02
e) # hadoop fs –mkdir –p input/ncdc
f) 解压缩输入文件,hadoop无法识别.zip或者.rar
g) # hadoop fs -put ../input_tmp/* input/ncdc
h) 同一个任务对应的input文件内容格式必须一致。
2) 编写程序
a) MaxTemperature.java

// cc MaxTemperature Application to find the maximum temperature in the weather dataset
// vv MaxTemperature
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxTemperature {

  public static void main(String[] args) throws Exception {
    if (args.length != 2) {
      System.err.println("Usage: MaxTemperature <input path> <output path>");
      System.exit(-1);
    }

    Job job = new Job();
    job.setJarByClass(MaxTemperature.class);
    job.setJobName("Max temperature");

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(MaxTemperatureMapper.class);
    job.setReducerClass(MaxTemperatureReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
}
// ^^ MaxTemperature

b) MaxTemperatureMapper.java

// cc MaxTemperatureMapper Mapper for maximum temperature example
// vv MaxTemperatureMapper
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class MaxTemperatureMapper
  extends Mapper<LongWritable, Text, Text, IntWritable> {

  private static final int MISSING = 9999;

  @Override
  public void map(LongWritable key, Text value, Context context)
      throws IOException, InterruptedException {

    String line = value.toString();
    String year = line.substring(15, 19);
    int airTemperature;
    if (line.charAt(87) == ‘+‘) { // parseInt doesn‘t like leading plus signs
      airTemperature = Integer.parseInt(line.substring(88, 92));
    } else {
      airTemperature = Integer.parseInt(line.substring(87, 92));
    }
    String quality = line.substring(92, 93);
    if (airTemperature != MISSING && quality.matches("[01459]")) {
      context.write(new Text(year), new IntWritable(airTemperature));
    }
  }
}
// ^^ MaxTemperatureMapper

c) MaxTemperatureReducer.java

// cc MaxTemperatureReducer Reducer for maximum temperature example
// vv MaxTemperatureReducer
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MaxTemperatureReducer
  extends Reducer<Text, IntWritable, Text, IntWritable> {

  @Override
  public void reduce(Text key, Iterable<IntWritable> values,
      Context context)
      throws IOException, InterruptedException {

    int maxValue = Integer.MIN_VALUE;
    for (IntWritable value : values) {
      maxValue = Math.max(maxValue, value.get());
    }
    context.write(key, new IntWritable(maxValue));
  }
}
// ^^ MaxTemperatureReducer

3) eclipse上运行
a) 创建包org.hadoop.ncdc,编译上述3个Java类。
b) run as → Java application → arguments
hdfs://master:9000/user/root/input/ncdc hdfs://master:9000/user/root/output/ncdc
c) # hadoop fs -ls output/ncdc
d) # hadoop fs cat output/ncdc/part-r-00000
4) javac方式执行
a) vi classpath.sh添加

export HADOOP_HOME=/usr/local/hadoop2.5
export CLASSPATH=.:/usr/local/jdk1.7/lib:/usr/local/jdk1.7/jre/lib
for f in $HADOOP_HOME/share/hadoop/common/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/hdfs/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/mapreduce/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done
for f in $HADOOP_HOME/share/hadoop/yarn/hadoop-*.jar;do
     export CLASSPATH=$CLASSPATH:$f
done

export CLASSPATH=$CLASSPATH:$HADOOP_HOME/share/common/lib:$HADOOP_HOME/share/hdfs/lib:$HADOOP_HOME/share/mapreduce/lib:$HADOOP_HOME/share/tools/lib:$HADOOP_HOME/share/yarn/lib

b) cat /usr/local/hadoop2.5/classpath.sh>> /etc/profile
c) cat /usr/local/hadoop2.5/classpath.sh >> /usr/local/hadoop2.5/etc/hadoop/hadoop-env.sh
d) source /etc/profile
e) 重启hadoop服务
f) # javac MaxTemperatureMapper.java -d .
其他类也一样,注意先编译最底层类,编译完成的class文件在Java程序的package路径下
g) # jar -cvf MaxTemperature.jar org #打成jar包
h) # jar -tvf MaxTemperature.jar #查看jar包目录结构
i) # hadoop jar MaxTemperature.jar org/hadoop/ncdc/MaxTemperature input/ncdc output/ncdc #运行jar包
hadoop jar 包名 程序主类名 输入文件夹 输出文件夹
j) # hadoop fs -ls output/ncdc
k) # hadoop fs -cat output/ncdc/part-r-00000
5) eclipse输出jar包运行
a) 右键export→jar→生成jar包
由于夸系统,故无需在打jar包过程中附加.classpath
b) 因已经配置了classpath,Linux上运作jar包即可。

版权声明:本文为博主原创文章,未经博主允许不得转载。

hadoop学习笔记(七)——hadoop权威指南中天气数据运行

标签:hadoop

原文地址:http://blog.csdn.net/thinkpadshi/article/details/47177119

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!