标签:
import org.apache.spark._ import SparkContext._ import org.apache.spark.SparkConf import java.util.Date import java.text.SimpleDateFormat import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextOutputFormat import org.apache.spark.Partitioner object partitioner { def main(args: Array[String]): Unit = { val time = new SimpleDateFormat("MMddHHmm").format(new Date()); val sparkConf = new SparkConf().setAppName("wordcount_"+time) sparkConf.set("mapreduce.framework.name", "yarn"); val sc =new SparkContext(sparkConf) val textFile = sc.textFile( "hdfs://namenode:9000/data/mapreduce/chuping/test_in_1/new5", 1).cache() val result = textFile.flatMap (line => line.split("\t") ). map (word => (word,1)).reduceByKey(new testPartitioner, _+_) result.saveAsTextFile("hdfs://namenode:9000/data/zk/test/partitioner"+time) sc.stop() } } class testPartitioner extends Partitioner{ val numPartitions = 3 def getPartition(key: Any)=1 指定到第几个reduce }
这里的程序只是一个测试的程序,使用的也是一个count而已,无法体现partitioner的实际作用,但是在实际生产中,partitioner的运用比比皆是
标签:
原文地址:http://my.oschina.net/u/2010330/blog/493942