码迷,mamicode.com
首页 > 其他好文 > 详细

MapReduce_去重抽取mdn_imsi

时间:2014-05-12 19:55:27      阅读:326      评论:0      收藏:0      [点我收藏+]

标签:style   blog   class   code   java   c   

 

bubuko.com,布布扣
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;


public class MdnWithImsiTest {
    public static class MyMapper extends Mapper<Object, Text, Text, Text> {
        private Text keytText = new Text();
        private Text valuetText = new Text();
//        private static Text line = new Text();
        public void map(Object key, Text value, Context context)throws IOException, InterruptedException {
            // TODO Auto-generated method stub
            
            String[] items = value.toString().split("\\|");
            System.out.println(items.length);
            // context.write(text.set(items[8]), text.set(items[1]));
            keytText.set(items[8]);
            valuetText.set(items[1]);
            context.write(keytText, valuetText);
//            line=value;
//            context.write(line, new Text(""));
        }
    }

    public static class MyReducer extends Reducer<Text, Text, Text, Text> {
        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            Text new_value = new Text();
            for (Text val2 : values) {
                new_value=val2;
             }
            context.write(key, new_value);
            
            
        }
    }

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args)
                .getRemainingArgs();
        System.out.println(otherArgs.length);
        if (otherArgs.length != 2) {
            System.err.println("Usage: wordcount <in> <out>");
            System.exit(2);
        }
        Job job = new Job(conf, "MdnImsi");
        job.setJobName("MdnImsi");
        job.setJarByClass(MdnWithImsiTest.class);
        job.setMapperClass(MyMapper.class);
        job.setCombinerClass(MyReducer.class);
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}
bubuko.com,布布扣

 

MapReduce_去重抽取mdn_imsi,布布扣,bubuko.com

MapReduce_去重抽取mdn_imsi

标签:style   blog   class   code   java   c   

原文地址:http://www.cnblogs.com/Relieved-U/p/3722205.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!