Spark2.x读Hbase1-2.x

时间：2020-04-22 09:16:07 阅读：55 评论：0 收藏：0 [点我收藏+]

标签：col main 表数据表数 mapr park spark key mapreduce

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkConf, SparkContext}

/**
  * 读取HBase表数据
  */
object SparkOperateHBase {

  def main(args: Array[String]): Unit = {

    val conf = HBaseConfiguration.create()
    val sc = new SparkContext(new SparkConf())

    conf.set(TableInputFormat.INPUT_TABLE,"student")

    val stuRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
      classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
      classOf[org.apache.hadoop.hbase.client.Result])

    stuRDD.cache()

    val count = stuRDD.count()
    println("Students RDDCount: " + count)

    //读取HBase表数据并打印出来
    stuRDD.foreach({case (_,result) =>
      val key = Bytes.toString(result.getRow)
      val name = Bytes.toString(result.getValue("info".getBytes,"name".getBytes()))
      val gender = Bytes.toString(result.getValue("info".getBytes,"gender".getBytes()))
      val age = Bytes.toString(result.getValue("info".getBytes,"age".getBytes()))
      println("Row key:" + key + " Name: " + name + " Gender: " + gender + " Age: " + age)
    })

    //读取HBase表数据并转为RDD
    val resRDD = stuRDD.map(res => {
      val key = Bytes.toString(res._2.getRow)
      val name = Bytes.toString(res._2.getValue("info".getBytes,"name".getBytes()))
      val gender = Bytes.toString(res._2.getValue("info".getBytes,"gender".getBytes()))
      val age = Bytes.toString(res._2.getValue("info".getBytes,"age".getBytes()))
      (key, name, gender, age)
    })

  }

}

Spark2.x读Hbase1-2.x

标签：col main 表数据表数 mapr park spark key mapreduce

原文地址：https://www.cnblogs.com/zxbdboke/p/12749533.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行