您的位置：首页 > 运维架构

spark core 2.0 Partition and HadoopPartition

2017-02-17 16:05 344 查看

Spark Partition is a trait.

/**
* An identifier for a partition in an RDD.
*/
trait Partition extends Serializable {
/**
* Get the partition's index within its parent RDD
*/
def index: Int

// A better default implementation of HashCode
override def hashCode(): Int = index

override def equals(other: Any): Boolean = super.equals(other)
}

/**
* A Spark split class that wraps around a Hadoop InputSplit.
*/
private[spark] class HadoopPartition(rddId: Int, override val index: Int, s: InputSplit)
extends Partition {

val inputSplit = new SerializableWritable[InputSplit](s)

override def hashCode(): Int = 31 * (31 + rddId) + index

override def equals(other: Any): Boolean = super.equals(other)

/**
* Get any environment variables that should be added to the users environment when running pipes
* @return a Map with the environment variables and corresponding values, it could be empty
*/
def getPipeEnvVars(): Map[String, String] = {
val envVars: Map[String, String] = if (inputSplit.value.isInstanceOf[FileSplit]) {
val is: FileSplit = inputSplit.value.asInstanceOf[FileSplit]
// map_input_file is deprecated in favor of mapreduce_map_input_file but set both
// since it's not removed yet
Map("map_input_file" -> is.getPath().toString(),
"mapreduce_map_input_file" -> is.getPath().toString())
} else {
Map()
}
envVars
}
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航