RDDTransformation Ops
2016-01-23 16:09
239 查看
package dt.sparkimport org.apache.spark.SparkConfimport org.apache.spark.SparkContextobject Transformation { def main(args: Array[String]): Unit = { val conf=new SparkConf().setAppName("hehadf").setMaster("local") val sc=new SparkContext(conf) mapTransformation(sc)
filterTransformation(sc) flatMapTransformation(sc) joinTransformation(sc) cogroupTransformation(sc) } def mapTransformation(sc:SparkContext){ val nums=sc.parallelize(1 to 10) val mapped=nums.map(item=>item*2) mapped.collect().foreach(println) //sc.stop } def
filterTransformation(sc:SparkContext){ val nums=sc.parallelize(1 to 10) val filtered=nums.filter { item =>item%2==0 }//need a boolean filtered.collect().foreach(println) //sc.stop } def flatMapTransformation(sc:SparkContext){// def parallelize[T: ClassTag](
//这是parallelize的定义// seq: Seq[T],// numSlices: Int = defaultParallelism): RDD[T] //可以这样理解先map后flat flat的意思就是把map的结果放在一个集合中比如(Array) val data=Array("jj sf","haha f er ","yyzz sdf d") val data1=sc.parallelize(data)//parallelize()中要传入什么东西,要传入一个集合 val word=data1.flatMap(line=>line.split("
")) word.collect().foreach { x=>println(x) } } def joinTransformation(sc:SparkContext){ val data=Array(Tuple2(1,"xx"),Tuple2(2,"yy"),Tuple2(3,"zz")) val data1=Array(Tuple2(1,"aa"),Tuple2(2,"bb"),Tuple2(3,"cc")) val data2=sc.parallelize(data) val data3=sc.parallelize(data1)
val data4=data2.join(data3) data4.collect().foreach(pair=>println(pair._1+" "+pair._2)) } def groupByKeyTransformation(sc:SparkContext){ val data=Array(Tuple2(1,2),Tuple2(2,3),Tuple2(3,4),Tuple2(1,4)) val data1=sc.parallelize(data) val data2=data1.groupByKey()
data2.collect().foreach(println) } def cogroupTransformation(sc:SparkContext){ val data=Array(Tuple2(1,Tuple2(1,2)),Tuple2(2,3),Tuple2(3,4),Tuple2(1,4)) val data1=Array(Tuple2(1,9),Tuple2(2,7),Tuple2(3," "),Tuple2(1,9),Tuple2(8,9)) val data2=sc.parallelize(data)
val data3=sc.parallelize(data1) val data4=data2.cogroup(data3) data4.collect.foreach(pair=>println(pair._1+" "+pair._2)) }}
filterTransformation(sc) flatMapTransformation(sc) joinTransformation(sc) cogroupTransformation(sc) } def mapTransformation(sc:SparkContext){ val nums=sc.parallelize(1 to 10) val mapped=nums.map(item=>item*2) mapped.collect().foreach(println) //sc.stop } def
filterTransformation(sc:SparkContext){ val nums=sc.parallelize(1 to 10) val filtered=nums.filter { item =>item%2==0 }//need a boolean filtered.collect().foreach(println) //sc.stop } def flatMapTransformation(sc:SparkContext){// def parallelize[T: ClassTag](
//这是parallelize的定义// seq: Seq[T],// numSlices: Int = defaultParallelism): RDD[T] //可以这样理解先map后flat flat的意思就是把map的结果放在一个集合中比如(Array) val data=Array("jj sf","haha f er ","yyzz sdf d") val data1=sc.parallelize(data)//parallelize()中要传入什么东西,要传入一个集合 val word=data1.flatMap(line=>line.split("
")) word.collect().foreach { x=>println(x) } } def joinTransformation(sc:SparkContext){ val data=Array(Tuple2(1,"xx"),Tuple2(2,"yy"),Tuple2(3,"zz")) val data1=Array(Tuple2(1,"aa"),Tuple2(2,"bb"),Tuple2(3,"cc")) val data2=sc.parallelize(data) val data3=sc.parallelize(data1)
val data4=data2.join(data3) data4.collect().foreach(pair=>println(pair._1+" "+pair._2)) } def groupByKeyTransformation(sc:SparkContext){ val data=Array(Tuple2(1,2),Tuple2(2,3),Tuple2(3,4),Tuple2(1,4)) val data1=sc.parallelize(data) val data2=data1.groupByKey()
data2.collect().foreach(println) } def cogroupTransformation(sc:SparkContext){ val data=Array(Tuple2(1,Tuple2(1,2)),Tuple2(2,3),Tuple2(3,4),Tuple2(1,4)) val data1=Array(Tuple2(1,9),Tuple2(2,7),Tuple2(3," "),Tuple2(1,9),Tuple2(8,9)) val data2=sc.parallelize(data)
val data3=sc.parallelize(data1) val data4=data2.cogroup(data3) data4.collect.foreach(pair=>println(pair._1+" "+pair._2)) }}
相关文章推荐
- 建网站需要学习的软件有哪些
- centos桌面的安装和卸载
- eclipse 创建 javaWeb 项目 如何 配置 tomcat
- 使用Jenkins配置自动化构建
- tomcat的下载、配置、启动和关闭
- Hadoop FS Shell Command
- Linux环境软件安装流程
- 格式化输出命令printf
- linux习惯每天一个命令之mv
- linux上使用netstat查看当前服务和监听端口
- 用VMware在本机创建多个Linux(Ubuntu)并相互通信,搭建服务器集群
- linux时区的几个代码片段
- centos7之docker使用systemd
- 权重6网站SEO诊断,如何寻找网站流量的突破口
- Theano(1):windows、linux下安装深度学习库Theano
- 运维安全概述
- linux常用命令
- OC中的OOP之 --- 基础语法
- CentOS6.5升级内核到3.10.28 --已验证
- linux学习笔记----10