码迷,mamicode.com
首页 > 其他好文 > 详细

用spark写的wordcount

时间:2018-02-23 13:31:29      阅读:187      评论:0      收藏:0      [点我收藏+]

标签:fun   ati   配置   func   throw   flat   exception   throws   ogg   

用Spark写的wordcount
 
scala版:
object WordCountDemo {
def main(args: Array[String]): Unit = {
//设置log级别
Logger.getLogger("org").setLevel(Level.WARN)
val conf = new SparkConf().setAppName("WordCountDemo").setMaster("local")
val sc = new SparkContext(conf)
sc.textFile("hdfs://hadoop001:9000/in/word")
.flatMap(_.split(" ")).map((_,1)).reduceByKey(_ + _).collect().foreach(println(_))
sc.stop()
}
}
java版:
public class WordCountDemo {
public static void main(String[] args) {
//log
Logger.getLogger("org").setLevel(Level.WARN);
//配置
SparkConf conf = new SparkConf().setAppName("WordCountDemo").setMaster("local");
//spark上下文
JavaSparkContext jsc = new JavaSparkContext(conf);
//创建
JavaRDD<String> linesRDD = jsc.textFile("hdfs://hadoop001:9000/in/word");
//flatMap操作
JavaRDD<String> wordsRDD = linesRDD.flatMap(new FlatMapFunction<String, String>() {
public Iterable<String> call(String line) throws Exception {
String[] words = line.split(" ");
return Arrays.asList(words);
}
});
//map操作
JavaPairRDD<String, Integer> tupleRDD = wordsRDD.mapToPair(new PairFunction<String, String, Integer>() {
public Tuple2<String, Integer> call(String word) throws Exception {
return new Tuple2<String, Integer>(word,1);
}
});
//reduceByKey操作
JavaPairRDD<String, Integer> resultRDD = tupleRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
public Integer call(Integer v1, Integer v2) throws Exception {
return v1 + v2;
            }
});
//拉回driver端
List<Tuple2<String, Integer>> result = resultRDD.collect();
//遍历
for(Tuple2<String, Integer> tuple2 : result){
System.out.println(tuple2._1 +"="+ tuple2._2);
}
jsc.stop();

}
}

 

用spark写的wordcount

标签:fun   ati   配置   func   throw   flat   exception   throws   ogg   

原文地址:https://www.cnblogs.com/waveblue/p/8461488.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!