您的位置:首页 > 编程语言

spark 源代码分析 (二)spark启动过程

2017-08-15 20:36 274 查看
下载spark后,调用shell脚本启动:

./bin/spark-shell --master local[2]


接下来将分析这个一个过程。spark-shell

会启动--class
org.apache.spark.repl.Main,

参数
--name
"Spark shell"
"$@" ,然后会调用spark-submit

cygwin=false

case "$(uname)" in

CYGWIN*) cygwin=true;;

esac


# Enter posix mode for bash

set -o posix


if [ -z "${SPARK_HOME}" ]; then

source "$(dirname "$0")"/find-spark-home

fi


export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"


# SPARK-4161: scala does not assume use of the java classpath,

# so we need to add the "-Dscala.usejavacp=true" flag manually. We

# do this specifically for the Spark shell because the scala REPL

# has its own class loader, and any additional classpath specified

# through spark.driver.extraClassPath is not automatically propagated.

SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dscala.usejavacp=true"


function main() {

if $cygwin; then

# Workaround for issue involving JLine and Cygwin

# (see http://sourceforge.net/p/jline/bugs/40/).[/code] 
# If you're using the Mintty terminal emulator in Cygwin, may need to set the

# "Backspace sends ^H" setting in "Keys" section of the Mintty options

# (see https://github.com/sbt/sbt/issues/562).[/code] 
stty -icanon min 1 -echo > /dev/null 2>&1

export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"

## 这里自定了启动

"${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"

stty icanon echo > /dev/null 2>&1

else

export SPARK_SUBMIT_OPTS

"${SPARK_HOME}"/bin/spark-submit --class org.apache.spark.repl.Main --name "Spark shell" "$@"

fi

}


# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in

# binary distribution of Spark where Scala is not installed

exit_status=127

saved_stty=""


# restore stty settings (echo in particular)

function restoreSttySettings() {

stty $saved_stty

saved_stty=""

}


function onExit() {

if [[ "$saved_stty" != "" ]]; then

restoreSttySettings

fi

exit $exit_status

}


# to reenable echo if we are interrupted before completing.

trap onExit INT


# save terminal settings

saved_stty=$(stty -g 2>/dev/null)

# clear on error so we don't later try to restore them

if [[ ! $? ]]; then

saved_stty=""

fi


main "$@"


# record the exit status lest it be overwritten:

# then reenable echo and propagate the code.

exit_status=$?

onExit


spark-submit,这个简单,调用了spark-class
参数 :org.apache.spark.deploy.SparkSubmit

if [ -z "${SPARK_HOME}" ]; then

source "$(dirname "$0")"/find-spark-home

fi


# disable randomized hash for string in Python 3.3+

export PYTHONHASHSEED=0


exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"


spark-class如下:这里主要加载java class 和jar包


if [ -z "${SPARK_HOME}" ]; then

source "$(dirname "$0")"/find-spark-home

fi


. "${SPARK_HOME}"/bin/load-spark-env.sh


# Find the java binary

if [ -n "${JAVA_HOME}" ]; then

RUNNER="${JAVA_HOME}/bin/java"

else

if [ "$(command -v java)" ]; then

RUNNER="java"

else

echo "JAVA_HOME is not set" >&2

exit 1

fi

fi


# Find Spark jars.

if [ -d "${SPARK_HOME}/jars" ]; then

SPARK_JARS_DIR="${SPARK_HOME}/jars"

else

SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"

fi


if [ ! -d "$SPARK_JARS_DIR" ] && [ -z "$SPARK_TESTING$SPARK_SQL_TESTING" ]; then

echo "Failed to find Spark jars directory ($SPARK_JARS_DIR)." 1>&2

echo "You need to build Spark with the target \"package\" before running this program." 1>&2

exit 1

else

LAUNCH_CLASSPATH="$SPARK_JARS_DIR/*"

fi


# Add the launcher build dir to the classpath if requested.

if [ -n "$SPARK_PREPEND_CLASSES" ]; then

LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"

fi


# For tests

if [[ -n "$SPARK_TESTING" ]]; then

unset YARN_CONF_DIR

unset HADOOP_CONF_DIR

fi


# The launcher library will print arguments separated by a NULL character, to allow arguments with

# characters that would be otherwise interpreted by the shell. Read that in a while loop, populating

# an array that will be used to exec the final command.

#

# The exit code of the launcher is appended to the output, so the parent shell removes it from the

# command array and checks the value to see if the launcher succeeded.

build_command() {

"$RUNNER" -Xmx128m -cp "$LAUNCH_CLASSPATH" org.apache.spark.launcher.Main "$@"

printf "%d\0" $?

}


# Turn off posix mode since it does not allow process substitution

set +o posix

CMD=()

while IFS= read -d '' -r ARG; do

CMD+=("$ARG")

done < <(build_command "$@")


COUNT=${#CMD[@]}

LAST=$((COUNT - 1))

LAUNCHER_EXIT_CODE=${CMD[$LAST]}


# Certain JVM failures result in errors being printed to stdout (instead of stderr), which causes

# the code that parses the output of the launcher to get confused. In those cases, check if the

# exit code is an integer, and if it's not, handle it as a special error case.

if ! [[ $LAUNCHER_EXIT_CODE =~ ^[0-9]+$ ]]; then

echo "${CMD[@]}" | head -n-1 1>&2

exit 1

fi


if [ $LAUNCHER_EXIT_CODE != 0 ]; then

exit $LAUNCHER_EXIT_CODE

fi


CMD=("${CMD[@]:0:$LAST}")

exec "${CMD[@]}"


所以最终启动了

org.apache.spark.repl.Main

org.apache.spark.deploy.SparkSubmit

org.apache.spark.launcher.Main
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  spark 源代码