标签:
1 package keySort; 2 import java.io.IOException; 3 4 import org.apache.hadoop.conf.Configuration; 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.mapreduce.Job; 7 import org.apache.hadoop.mapreduce.Mapper; 8 import org.apache.hadoop.mapreduce.Partitioner; 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 11 import org.apache.hadoop.fs.Path; 12 public class keySort { 13 public static class MyMapper extends Mapper<Object, Text, Text, Text> { 14 private Text map_value=new Text("hello"); 15 public void map(Object key,Text value,Context context) throws IOException, InterruptedException{ 16 context.write(value, map_value); 17 } 18 } 19 20 public static class MyPartitioner extends Partitioner<Text, Text>{ 21 public Text realKey=new Text(); 22 @Override 23 public int getPartition(Text key, Text value, int numredtasks) { 24 // TODO 自动生成的方法存根 25 String[] kk=key.toString().split("\t"); 26 realKey.set(kk[0]); 27 System.out.println("partition key is:"+realKey.toString()); 28 return ((realKey.hashCode()&Integer.MAX_VALUE)%numredtasks); 29 } 30 31 } 32 33 public static void main(String[] args) throws Exception 34 { 35 if(args.length!=2){ 36 System.err.println("Usage: MatrixMultiply <inputPathM> <inputPathN> <outputPath>"); 37 System.exit(2); 38 } 39 Configuration conf=new Configuration(); 40 Job job=new Job(conf,"SeconderySort"); 41 job.setNumReduceTasks(1);//设置为0的话,并不会执行分区过程,也就不能利用符合jian zhi 42 job.setJarByClass(keySort.class); 43 job.setMapperClass(MyMapper.class); 44 job.setPartitionerClass(MyPartitioner.class); 45 job.setOutputKeyClass(Text.class); 46 job.setOutputValueClass(Text.class); 47 FileInputFormat.setInputPaths(job, new Path(args[0])); 48 FileOutputFormat.setOutputPath(job, new Path(args[1])); 49 System.exit(job.waitForCompletion(true)?0:1); 50 51 } 52 53 } 54 55 56 57 58
标签:
原文地址:http://www.cnblogs.com/lz3018/p/4965087.html