码迷,mamicode.com
首页 > 其他好文 > 详细

数据筛选

时间:2015-12-10 21:25:27      阅读:128      评论:0      收藏:0      [点我收藏+]

标签:

数据准备

route_log

Apr 23 11:49:54 hostapd: wlan0: STA 14:7d:c5:9e:fb:84

Apr 23 11:49:52 hostapd: wlan0: STA 74:e5:0b:04:28:f2

Apr 23 11:49:50 hostapd: wlan0: STA cc:af:78:cc:d5:5d

Apr 23 11:49:44 hostapd: wlan0: STA cc:af:78:cc:d5:5d

Apr 23 11:49:43 hostapd: wlan0: STA 14:7d:c5:9e:fb:84

Apr 23 11:49:42 hostapd: wlan0: STA 74:e5:0b:04:28:f2

将route_log上传到HDFS上,从route_log中筛选  : month day mac 

代码编写

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class Route_filter extends Configured implements Tool {

@Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = getConf();
Job job = new Job(conf, "route_filter");
job.setJarByClass(Route_filter.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setMapperClass(RouteMap.class);
FileInputFormat.addInputPath(job, new Path("/value/route_log"));
FileOutputFormat.setOutputPath(job, new Path("/outvalue/outroute_log"));
job.submit();
return job.isSuccessful() ? 0 : 1;
}

public static void main(String[] args) throws Exception {
ToolRunner.run(new Configuration(), new Route_filter(), null);
}
}

class RouteMap extends Mapper<LongWritable, Text, Text, NullWritable> {
private Text result = new Text();

protected void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
String lineValue = value.toString();
String[] lineSplit = lineValue.split(" ");
String month = lineSplit[0];
String day = lineSplit[1];
String mac = lineSplit[6];
result.set(month + " " + day + " " + mac);
context.write(result, NullWritable.get());
}
}

  

最终输出结果:

Apr 23 14:7d:c5:9e:fb:84
Apr 23 14:7d:c5:9e:fb:84
Apr 23 74:e5:0b:04:28:f2
Apr 23 74:e5:0b:04:28:f2
Apr 23 cc:af:78:cc:d5:5d
Apr 23 cc:af:78:cc:d5:5d

数据筛选

标签:

原文地址:http://www.cnblogs.com/LgyBean/p/5037081.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!