如何进行单元测试

时间：2015-12-26 14:55:59 阅读：301 评论：0 收藏：0 [点我收藏+]

标签：

　　Hadoop的MapReduce程序提交到集群环境中运行，出问题时定位非常麻烦，有时需要一遍遍修改代码和打印日志来排查问题，哪怕是比较小的问题。如果数据量很大的话调试起来就相当耗费时间。而且，Map和Reduce的一些参数是Hadoop框架在运行时传入的，比如Context、InputSplit，这进一步增加了调试的难度。如果有一个良好的单元测试框架能帮助尽早发现、清除bug，那就太好了。

MRUnit 框架

MRUnit是Cloudera公司专为Hadoop MapReduce写的单元测试框架，API非常简洁实用。MRUnit针对不同测试对象使用不同的Driver：

MapDriver：针对单独的Map测试

ReduceDriver：针对单独的Reduce测试

MapReduceDriver：将map和reduce串起来测试

PipelineMapReduceDriver：将多个MapReduce对串志来测试

接下来，小讲就以Temperature程序作为测试案例，说明如何使用MRUnit框架？

准备测试案例

为了理解单元测试框架，我们准备了一个 MapReduce程序，这里还是以 Temperature 作为测试案例进行演示，只不过 map 方法中的气象站id（key）是从读入文件名称中提取的，为了便于单元测试，这里我们将 key 设置为常量03103，Temperature 具体代码如下所示。

package com.dajiangtai.hadoop.test;

import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* 统计美国每个气象站30年来的平均气温
* 1、编写map()函数
* 2、编写reduce()函数
* 3、编写run()执行方法，负责运行MapReduce作业
* 4、在main()方法中运行程序
*
* @author zhouls
*
*/
//继承Configured类，实现Tool接口
public class Temperature extends Configured implements Tool
{
　　public static class TemperatureMapper extends Mapper<LongWritable, Text, Text, IntWritable>
　　{　　　　　　　　　　　　　　　　　　　　　　　　　　 //输入的key,输入的value,输出的key,输出的value
　　　　　　/**
　　　　　　* @function Mapper 解析气象站数据
　　　　　　* @input key=偏移量 value=气象站数据
　　　　　　* @output key=weatherStationId value=temperature
　　　　　　*/
　　　　　　public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
　　　　　　{ 　　　　　　　　　　　　　　　　　　　　　　 //map()函数还提供了context实例，用于键值对的输出
　　　　　　　　　//第一步，我们将每行气象站数据转换为每行的String类型
　　　　　　　　 String line = value.toString(); //每行气象数据

　　　　　　 //第二步：提取气温值
　　　　　　　　 int temperature = Integer.parseInt(line.substring(14, 19).trim());//每小时气温值
　　　　　　　　　　　　　　　　　　　　　　　　　　　　//需要转换为整形，截取第14位到19位，把中间的空格去掉。
　　　　　　　　 if (temperature != -9999) //过滤无效数据
　　　　　　　　{
　　　　　　　　　　　　//第三步：提取气象站编号
　　　　　　　　　　　　//获取输入分片
　　　　　　　　　　　　FileSplit fileSplit = (FileSplit) context.getInputSplit();//提取输入分片，并转换类型
　　　　　　　　　　　　//然后通过文件名称提取气象站编号
　　　　　　　　　　　　String weatherStationId = fileSplit.getPath().getName().substring(5, 10);//通过文件名称提取气象站id
　　　　　　　　　　　　　　　　　　　　　　//首先通过文件分片fileSplit来获取文件路径，然后再获取文件名字，然后截取第5位到第10位就可以得到气象站编号
　　　　　　　　　　　　context.write(new Text(weatherStationId), new IntWritable(temperature));
　　　　　　　　　　　　　　　　　　　　 //气象站编号，气温值
　　　　　　　　}
　　　　　　}
　　　}

public static class TemperatureReducer extends Reducer< Text, IntWritable, Text, IntWritable>
{
　　private IntWritable result = new IntWritable();
　　　　　　　　　　　　//因为气温是IntWritable类型
　　public void reduce(Text key, Iterable< IntWritable> values,Context context) throws IOException, InterruptedException
　　{ 　　　　　　　　　　　　　　 //reduce输出的key,key的集合，context的实例
　　　　　//第一步：统计相同气象站的所有气温
　　　　　int sum = 0;
　　　　　int count = 0;
　　　　for (IntWritable val : values) //for循环来循环同一个气象站的所有气温值
　　　　{//对所有气温值累加
　　　　　　　sum += val.get();
　　　　　　count++;
　　　　　}
　　　　 result.set(sum / count);
　　　　 context.write(key, result);
　　}
}

public int run(String[] args) throws Exception
{
　　 // TODO Auto-generated method stub
　　 //第一步：读取配置文件
　　Configuration conf = new Configuration();//读取配置文件

　　//第二步:输出路径存在就先删除
　　Path mypath = new Path(args[1]);//定义输出路径的Path对象，mypath
　　FileSystem hdfs = mypath.getFileSystem(conf);//通过路径下的getFileSystem来获得文件系统
　　if (hdfs.isDirectory(mypath))//如果输出路径存在
　　{
　　　　hdfs.delete(mypath, true);//则就删除
　　}
　　//第三步：构建job对象
　　Job job = new Job(conf, "temperature");//新建一个任务,job名字是tempreature
　　job.setJarByClass(Temperature.class);// 设置主类
　　//通过job对象来设置主类Temperature.class

　　//第四步：指定数据的输入路径和输出路径
　　FileInputFormat.addInputPath(job, new Path(args[0]));// 输入路径,args[0]
　　FileOutputFormat.setOutputPath(job, new Path(args[1]));// 输出路径,args[1]

　　//第五步：指定Mapper和Reducer
　　job.setMapperClass(TemperatureMapper.class);// Mapper
　　job.setReducerClass(TemperatureReducer.class);// Reducer

　　//第六步：设置map函数和reducer函数的输出类型
　　job.setOutputKeyClass(Text.class);
　　job.setOutputValueClass(IntWritable.class);

　　//第七步：提交作业
　　return job.waitForCompletion(true)?0:1;//提交任务
　　}

　　/**
　　* @function main 方法
　　* @param args
　　* @throws Exception
　　*/
　　public static void main(String[] args) throws Exception
　　{
　　　　//第一步
　　　　String[] args0 =
　　　　{
　　　　　　"hdfs://djt002:9000/weather/",
　　　　　　"hdfs://djt002:9000/weather/out"
　　　　};
　　　　//第二步
　　　　int ec = ToolRunner.run(new Configuration(), new Temperature(), args0);
　　　　//第一个参数是读取配置文件，第二个参数是主类Temperature，第三个参数是输如路径和输出路径的属组
　　　　System.exit(ec);
　　}

}

1、打开MyEclipse，以美国气象站为例。在Temperature.java里，

技术分享

改写为

技术分享

目的是 key 设置为常量03103

因为这个气象站编号需要从文件中截取，不便于测试，所以说，我们将其设置为常量。

2、在D:\Software\hadoop-2.2.0\下创建extralib文件夹，将下载好的mrunit-hadoop.jar

放在这个目录下。

技术分享

3、首先需要导入mrunit包，即mrunit-hadoop.jar包

Hadoop -> Bulid Path -> Configure Bulid Path

4、 Java Bulid Path -> Libraries -> Add External JARs … ...

技术分享

5、 Map 单元测试

在com.dajiangtai.hadoop.test包下，创建TemperatureMapperTest.java，来对Map进行测试。

技术分享

6、编写TemperatureMapperTest.java的代码。编译，出现以下，则说明无误。

技术分享

在test()方法中，withInput的key/value参数分别为偏移量和一行气象数据，其类型要与TemperatureMapper的输入类型一致即为LongWritable和Text。 withOutput的key/value参数分别是我们期望输出的new Text("03103")和new IntWritable(200)，我们要达到的测试效果就是我们的期望输出结果与 TemperatureMapper 的实际输出结果一致。

测试方法为 test() 方法，左边的对话框里显示"Runs：1/1，Errors：0，Failures：0"，说明 Mapper 测试成功了。

7 Reducer 单元测试

在com.dajiangtai.hadoop.test包下，创建TemperatureReduceTest.java，来对Reduce进行测试。

在test()方法中，withInput的key/value参数分别为new Text(key)和List类型的集合values。withOutput 的key/value参数分别是我们所期望输出的new Text(key)和new IntWritable(150)，我们要达到的测试效果就是我们的期望输出结果与TemperatureReducer实际输出结果一致。

8 编写TemperatureReduceTest.java的代码。编译，出现以下，则说明无误。

Reducer 端的单元测试，鼠标放在 TemperatureReduceTest 类上右击，选择 Run As ——> JUnit test，运行结果如下所示。

技术分享

测试方法为 test() 方法，左边的对话框里显示"Runs：1/1，Errors：0，Failures：0"，说明 Reducer 测试成功了。

9 MapReduce 单元测试

把 Mapper 和 Reducer 集成起来的测试案例代码如下。

在com.dajiangtai.hadoop.test包下，创建TemperatureTest.java，来进行测试。

在 test() 方法中，withInput添加了两行测试数据line和line2，withOutput 的key/value参数分别为我们期望的输出结果new Text("03103")和new IntWritable(150)。我们要达到的测试效果就是我们期望的输出结果与Temperature实际的输出结果一致。

技术分享

10 编写TemperatureTest.java的代码。编译，出现以下，则说明无误。

Reducer 端的单元测试，鼠标放在 TemperatureTest.java类上右击，选择 Run As ——> JUnit test，运行结果如下所示。

技术分享

测试方法为 test() 方法，左边的对话框里显示"Runs：1/1，Errors：0，Failures：0"，说明 MapReduce 测试成功了。

如何进行单元测试

标签：

原文地址：http://www.cnblogs.com/zlslch/p/5078016.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行