标签:port 一个 去重 编写 ips cal main col line
今天你完成实验四前两个实验。但是!!!在跟eclipse的scala插件做斗争的时候,把hadoop的插件给搞没了,按着林子雨老师的教程走了一遍还是不行,不知不觉又搞了一下午,还是没解决。明天把实验四完成,做一些实验五,希望明天可以把eclipse的问题解决。
val lines = sc.textFile("file:///usr/local/hadoop/Data01.txt") val par = lines.map(row=>row.split(",")(0)) val distinct_par = par.distinct() //去重操作 distinct_par.count //取得总数
val lines = sc.textFile("file:///usr/local/hadoop/Data01.txt") val par = lines.map(row=>row.split(",")(1)) val distinct_par = par.distinct() distinct_par.count
val lines = sc.textFile("file:///usr/local/hadoop/Data01.txt") val pare = lines.filter(row=>row.split(",")(0)=="Tom") pare.foreach(println) pare.map(row=>(row.split(",")(0),row.split(",")(2).toInt)).mapValues(x=>(x,1)).reduceByKey((x,y ) => (x._1+y._1,x._2 + y._2)).mapValues(x => (x._1 / x._2)).collect() //res9: Array[(String, Int)] = Array((Tom,30))
val lines = sc.textFile("file:///usr/local/hadoop/Data01.txt") val pare = lines.map(row=>(row.split(",")(0),row.split(",")(1))) pare.mapValues(x => (x,1)).reduceByKey((x,y) => (" ",x._2 + y._2)).mapValues(x => x._2).foreach(println)
val lines = sc.textFile("file:///usr/local/spark/sparksqldata/Data01.txt") val pare = lines.filter(row=>row.split(",")(1)=="DataBase") pare.count res1: Long = 126
val lines = sc.textFile("file:///usr/local/spark/sparksqldata/Data01.txt") val pare = lines.map(row=>(row.split(",")(1),row.split(",")(2).toInt)) pare.mapValues(x=>(x,1)).reduceByKey((x,y) => (x._1+y._1,x._2 + y._2)).mapValues(x => (x._1 / x._2)).collect() res0: Array[(String, Int)] = Array((Python,57), (OperatingSystem,54), (CLanguage,50), (Software,50), (Algorithm,48), (DataStructure,47), (DataBase,50), (ComputerNetwork,51))
val lines = sc.textFile("file:///usr/local/spark/sparksqldata/Data01.txt") val pare = lines.filter(row=>row.split(",")(1)=="DataBase").map(row=>(row.split(",")(1),1)) val accum = sc.longAccumulator("My Accumulator") pare.values.foreach(x => accum.add(x)) accum.value
import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf import org.apache.spark.HashPartitioner object RemDup { def main(args: Array[String]) { val conf = new SparkConf().setAppName("RemDup") val sc = new SparkContext(conf) val dataFile = "file:///home/charles/data" val data = sc.textFile(dataFile,2) val res = data.filter(_.trim().length>0).map(line=>(line.trim,"")).partitionBy(new HashPartitioner(1)).groupByKey().sortByKey().keys res.saveAsTextFile("result") } }
标签:port 一个 去重 编写 ips cal main col line
原文地址:https://www.cnblogs.com/quyangzhangsiyuan/p/12257509.html