spark中join和group操作
2016-06-04 15:42
399 查看
package com.scala
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
/**
* scala测试join和cogroup:join是排列组合,而cgroup是分组
*/
object JoinAndCogroup {
def main(args:Array[String]):Unit={
val conf=new SparkConf().setAppName("joinAndcogroup").setMaster("local[1]")
//获取context
val sc=new SparkContext(conf)
//创建泛型集合
val stuList=List((1,"tom"),(2,"jim"),(3,"cassie"))
val scoreList=List((1,20),(1,90),(1,30),(2,23),(2,23),(2,80),(3,90),(3,100),(3,100))
//转化为RDD
val stuRDD=sc.parallelize(stuList)
val scoreRDD=sc.parallelize(scoreList)
/* //join操作
//遍历
val joinRDD=stuRDD.join(scoreRDD)
for( join2 <- joinRDD ) {
println("===========")
println("id is "+join2._1)
println("name is "+join2._2._1)
println("score is "+join2._2._2)
} */
//cogroup操作
val gourpRDD=stuRDD.cogroup(scoreRDD)
for (group2<- gourpRDD){
println("===========")
println("id is "+group2._1)
println("name is "+group2._2._1)
println("score is "+group2._2._2)
}
//遍历结果
}
}
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
/**
* scala测试join和cogroup:join是排列组合,而cgroup是分组
*/
object JoinAndCogroup {
def main(args:Array[String]):Unit={
val conf=new SparkConf().setAppName("joinAndcogroup").setMaster("local[1]")
//获取context
val sc=new SparkContext(conf)
//创建泛型集合
val stuList=List((1,"tom"),(2,"jim"),(3,"cassie"))
val scoreList=List((1,20),(1,90),(1,30),(2,23),(2,23),(2,80),(3,90),(3,100),(3,100))
//转化为RDD
val stuRDD=sc.parallelize(stuList)
val scoreRDD=sc.parallelize(scoreList)
/* //join操作
//遍历
val joinRDD=stuRDD.join(scoreRDD)
for( join2 <- joinRDD ) {
println("===========")
println("id is "+join2._1)
println("name is "+join2._2._1)
println("score is "+join2._2._2)
} */
//cogroup操作
val gourpRDD=stuRDD.cogroup(scoreRDD)
for (group2<- gourpRDD){
println("===========")
println("id is "+group2._1)
println("name is "+group2._2._1)
println("score is "+group2._2._2)
}
//遍历结果
}
}
相关文章推荐
- SSH Unexpected socket error:10106的解决办法
- CSS3中:nth-child()和nth-of-type()的却别
- SQL Server如何启用xp_cmdshell组件
- Yii 通过composer 安装的方法
- Yii2 捕获错误日志
- Yii2 捕获错误日志
- javascript中的this
- codeforces_676C. Vasya and String(二分)
- Iterator
- 安装oracle数据库 第一次登录使用
- 如何通过代码获取当前正在显示的控制器
- 56. Merge Intervals【H】【67】
- 断言(assert)的用法
- 捕获JS 错误日志
- 计算机原理-(个人理解-未完善)
- 类与对象
- 捕获JS 错误日志
- sim卡的EF(Elementary File:基本文件) 文件有何作用???
- SICP 练习1.37 计算黄金分割律
- spark中实现分组取topN