spark1.4 操作hbase 基于rdd
2016-06-24 11:07
387 查看
import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.rdd.NewHadoopRDD import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.mapred.JobConf import org.apache.spark.SparkContext import java.util.Properties import java.io.FileInputStream import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat object readDataFromHbase { def main(args: Array[String]): Unit = { var propFileName = "hbaseConfig.properties" if(args.size > 0){ propFileName = args(0) } /** Load properties **/ val prop = new Properties val inStream = new FileInputStream(propFileName) prop.load(inStream) //set spark context and open input file val sparkMaster = prop.getProperty("hbase.spark.master") val sparkJobName = prop.getProperty("hbase.spark.job.name") val sc = new SparkContext(sparkMaster,sparkJobName ) //set hbase connection val hbaseConf = HBaseConfiguration.create() hbaseConf.set("hbase.rootdir", prop.getProperty("hbase.rootdir")) hbaseConf.set(TableInputFormat.INPUT_TABLE, prop.getProperty("hbase.table.name")) val hBaseRDD = sc.newAPIHadoopRDD(hbaseConf, classOf[org.apache.hadoop.hbase.mapreduce.TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result] ) val hBaseData = hBaseRDD.map(t=>t._2) .map(res =>res.getColumnLatestCell("cf".getBytes(), "col".getBytes())) .map(c=>c.getValueArray()) .map(a=> new String(a, "utf8")) hBaseData.foreach(println) }
}
<!-- HBase --><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase</artifactId><version>${hbase.version}</version></dependency><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-client</artifactId><version>${hbase.version}</version></dependency><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-server</artifactId><version>${hbase.version}</version></dependency><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-common</artifactId><version>${hbase.version}</version></dependency><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-hadoop2-compat</artifactId><version>${hbase.version}</version></dependency><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-hadoop-compat</artifactId><version>${hbase.version}</version></dependency><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-hadoop-compat</artifactId><version>${hbase.version}</version></dependency><dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-protocol</artifactId><version>${hbase.version}</version></dependency>
<dependency><groupId>org.apache.hbase</groupId><artifactId>hbase-server</artifactId><version>${hbase.version}</version></dependency>
相关文章推荐
- #pragma data_seg 跨程序通信 单件模式
- 关于Cassandra一些调优配置
- 排序方法----选择排序
- 聊聊 tcpdump 与 Wireshark 抓包分析
- java中字母
- Nginx 模块自主开发四: 模块数据结构
- js ——算法
- 获取https证书
- err-disabled的查看与恢复
- 【Unity】UGUI如何判断鼠标或者手指是否点击到UI上
- Android 获取屏幕的分辨率
- (EM算法)The EM Algorithm(1)
- PopupWindow和Dialog
- 网络通信——socket(TCP/IP).Http,同步和异步的区别
- mysql忘记密码怎么办
- 玩转html5 canvas
- 欢迎使用CSDN-markdown编辑器
- JS中的call、apply、bind方法
- Eclipse 代码修改后仍然是之前的运行结果解决办法
- 重新安装Visual Studio后免重装Intel Parallel Studio