您的位置:首页 > 移动开发

spark--transform算子--mapPartitionsWithIndex

2017-07-18 21:52 417 查看
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.ArrayBuffer

/**
* Created by liupeng on 2017/6/15.
*/
object T_mapPartitionsWithIndex {

System.setProperty("hadoop.home.dir","F:\\hadoop-2.6.5")

def fun_index(index : Int, iter : Iterator[String]) : Iterator[String] = {
var list  = ArrayBuffer[String]()
while (iter.hasNext)
{
val name : String = iter.next()
var fs = index + ":" + name
list += fs
println(fs)
}
return list.iterator
}

def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("mapPartitionsWithIndex_test").setMaster("local")
val sc = new SparkContext(conf)

//准备一下数据
val names: List[String] = List("liupeng", "xuliuxi", "xiaoma")
val nameRDD = sc.parallelize(names, 2)
//  按照分区以及索引遍历
//如果想知道谁分到了一起,mapPartitionsWithIndex这个算子可以拿到每个partition的index
val nameWithPartionIndex = nameRDD.mapPartitionsWithIndex(fun_index)
println(nameWithPartionIndex.count())
}
}

运行结果:
0:liupeng

1:xuliuxi

1:xiaoma

3
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息