码迷,mamicode.com
首页 > 其他好文 > 详细

spark actions 算子

时间:2019-04-02 12:21:49      阅读:116      评论:0      收藏:0      [点我收藏+]

标签:pairs   oid   api   scala   call   author   ast   new   str   

package action;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.VoidFunction;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;
import java.util.Map;

/**
 * TODO
 *
 * @ClassName: actions
 * @author: DingH
 * @since: 2019/4/2 10:53
 */
public class actions {
    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("actions").setMaster("local");
        JavaSparkContext sc = new JavaSparkContext(conf);

        JavaRDD<Integer> parallelize = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));

        JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(Arrays.asList(
                new Tuple2<String, Integer>("aaaa", 111),
                new Tuple2<String, Integer>("aaaa", 111),
                new Tuple2<String, Integer>("bbbb", 222),
                new Tuple2<String, Integer>("bbbb", 222),
                new Tuple2<String, Integer>("bbbb", 222),
                new Tuple2<String, Integer>("ccc", 333)
        ));

        JavaPairRDD<String, Integer> rdd1 = rdd.reduceByKey(new Function2<Integer, Integer, Integer>() {
            public Integer call(Integer integer, Integer integer2) throws Exception {
                return integer + integer2;
            }
        });

        Tuple2<String, Integer> reduce = rdd1.reduce(new Function2<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>>() {
            public Tuple2<String, Integer> call(Tuple2<String, Integer> stringIntegerTuple2, Tuple2<String, Integer> stringIntegerTuple22) throws Exception {
                Tuple2<String, Integer> stringIntegerTuple21 = new Tuple2<String, Integer>(stringIntegerTuple2._1 + stringIntegerTuple22._1, stringIntegerTuple2._2 + stringIntegerTuple22._2);

                return stringIntegerTuple21;
            }
        });

        System.out.println(reduce);

        List<Tuple2<String, Integer>> collect = rdd1.collect();
        for (Tuple2<String,Integer> tt:collect){
            System.out.println(tt);
        }

        long count = rdd1.count();

        Tuple2<String, Integer> first = rdd1.first();

        List<Tuple2<String, Integer>> take = rdd1.take(4);

        List<Tuple2<String, Integer>> tuple2s = rdd1.takeSample(false, 3);

        rdd1.saveAsTextFile("");

        Map<String, Object> stringObjectMap = rdd1.countByKey();

        rdd1.foreach(new VoidFunction<Tuple2<String, Integer>>() {
            public void call(Tuple2<String, Integer> stringIntegerTuple2) throws Exception {
                System.out.println(11);
            }
        });

        sc.stop();
    }
}

  

spark actions 算子

标签:pairs   oid   api   scala   call   author   ast   new   str   

原文地址:https://www.cnblogs.com/dhName/p/10641618.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!