package com.latrobe.spark import org.apache.spark.{SparkContext, SparkConf} /** * Created by spark on 15-1-18. */ object FlatMapValues { def main(args: Array[String]) { val conf = new SparkConf().setAppName("spark-demo").setMaster("local") val sc = new SparkContext(conf) import org.apache.spark.SparkContext._ val a = sc.parallelize(List("dog", "tiger", "lion", "cat", "panther", "eagle") , 2) //b会是这个样子:(3,dog)(5,tiger)(4,lion)(3,cat)(7,panther)(5,eagle) val b = a.map(x => (x.length , x)) /** * 1 针对b的所有Value执行"x" + _ + "x",例如 dog => xdogx * 2 将1计算获得的Value压扁,Key保持不变。例如 (3,xdogx) => (3,x),(3,d),(3,o),(3,g),(3,x) * 3 flatMapValues 是PairRDDFunctions中的一个函数,所以前面需要隐式转换 */ val c = b.flatMapValues("x" + _ + "x") c.collect().foreach(print) } }
Spark PairRDDFunctions flatMapValues
原文地址:http://blog.csdn.net/hi_1234567/article/details/42845683