您的位置:首页 > 运维架构

Hadoop的hadoop-config.sh脚本详解

2015-11-25 16:27 633 查看
先简要说明下我的读该脚本的环境,集群是CDH5.3,在研究命令行中hadoop -jar ***.jar命令的相关脚本时,在hadoop文件中追到了hadoop-config.sh这个文件,所以下边的注解添加了我当前情况的处理。

this="${BASH_SOURCE-$0}" #显示本脚本文件全路径

common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
#获取本脚本所在目录路径

script="$(basename -- "$this")" #获取脚本名称

this="$common_bin/$script"

[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh"
#在hadoop-config.sh中会执行hadoop-layout.sh(/opt/cloudera/parcels/CDH-5.3.0-1.cdh5.3.0.p0.30/lib/hadoop/libexec/hadoop-layout.sh)初始化些基本设置:

#设置hadoop common、 hdfs、yarn、mapred目录的一下变量

HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}

HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}

HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}

HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}

HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}

YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}

YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}

MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}

MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}
!!设置的值:
HADOOP_COMMON_DIR:./

HADOOP_COMMON_LIB_JARS_DIR:lib

HDFS_DIR:./

HDFS_LIB_JARS_DIR:lib

YARN_DIR:./

YARN_LIB_JARS_DIR:lib

MAPRED_DIR:./

MAPRED_LIB_JARS_DIR:lib

 #hadoop-layout.sh执行结束,继续执行hadoop-config.sh

# the root of the Hadoop installation,设定hadoop目录

# See HADOOP-6255 for directory structure layout

HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P)

HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
!!设置的值:
HADOOP_DEFAULT_PREFIX:/opt/cloudera/parcels/CDH-5.3.0-1.cdh5.3.0.p0.30/lib/hadoop

HADOOP_PREFIX:/opt/cloudera/parcels/CDH-5.3.0-1.cdh5.3.0.p0.30/lib/hadoop

#check to see if the conf dir is given as an optional argument
#校验是否设置配置文件目录,调用该config.sh,没有参数,直接过

if [ $# -gt 1 ]

then

    if [ "--config" = "$1" ]

          then

              shift

              confdir=$1

              if [ ! -d "$confdir" ]; then

                echo "Error: Cannot find configuration directory: $confdir"

                exit 1

             fi

              shift

              HADOOP_CONF_DIR=$confdir

    fi

fi

# Allow alternate conf dir location.(设定配置文件目录,环境中没有${HADOOP_PREFIX}/conf/hadoop-env.sh,所以使用etc/hadoop/的目录,环境中hadoop -> /etc/hadoop/conf有这么个链接,所以读的是/etc/hadoop/conf/下的配置文件)

if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then

  DEFAULT_CONF_DIR="conf"

else

  DEFAULT_CONF_DIR="etc/hadoop"

fi

export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"

# User can specify hostnames or a file where the hostnames are (not both)(用户可指定slave的hostnames,不能两个变量都指定值,本环境中两个变量都是空的)

if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then

  echo \

    "Error: Please specify one variable HADOOP_SLAVES or " \

    "HADOOP_SLAVE_NAME and not both."

  exit 1

fi

# Process command line options that specify hosts or file with host(可在脚本参数中指定hostnames。没有参数,所以没有指定HADOOP_SLAVES和HADOOP_SLAVE_NAMES)

# list

if [ $# -gt 1 ]

then

    if [ "--hosts" = "$1" ]

    then

        shift

        export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$1"

        shift

    elif [ "--hostnames" = "$1" ]

    then

        shift

        export HADOOP_SLAVE_NAMES=$1

        shift

    fi

fi

# User can specify hostnames or a file where the hostnames are (not both)

# (same check as above but now we know it's command line options that cause

# the problem)(校验参数中是否指定该两个参数,如果都指定也是错误的)

if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then

  echo \

    "Error: Please specify one of --hosts or --hostnames options and not both."

  exit 1

fi

#执行hadoop-env.sh(/etc/hadoop/conf/hadoop-env.sh).设置些环境变量

if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then

  . "${HADOOP_CONF_DIR}/hadoop-env.sh"

fi

# check if net.ipv6.bindv6only is set to 1 (检验net.ipv6.bindv6only是否为1,这个参数的作用没有深究)

bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null)

if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] && [ "$HADOOP_ALLOW_IPV6" != "yes" ]

then

  echo "Error: \"net.ipv6.bindv6only\" is set to 1 - Java networking could be broken"

  echo "For more info: http://wiki.apache.org/hadoop/HadoopIPv6"
  exit 1

fi

# Newer versions of glibc use an arena memory allocator that causes virtual

# memory usage to explode. This interacts badly with the many threads that

# we use in Hadoop. Tune the variable down to prevent vmem explosion.(因为版本问题,设置参数防止内存崩溃)

export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}

# Attempt to set JAVA_HOME if it is not set(如果没有JAVA_HOME,设定JAVA_HOME,JAVA_HOME这个环境变量很重要啊,,,)

if [[ -z $JAVA_HOME ]]; then

  # On OSX use java_home (or /Library for older versions)

  if [ "Darwin" == "$(uname -s)" ]; then

    if [ -x /usr/libexec/java_home ]; then

      export JAVA_HOME=($(/usr/libexec/java_home))

    else

      export JAVA_HOME=(/Library/Java/Home)

    fi

  fi

  # Bail if we did not detect it

  if [[ -z $JAVA_HOME ]]; then

    echo "Error: JAVA_HOME is not set and could not be found." 1>&2

    exit 1

  fi

fi

JAVA=$JAVA_HOME/bin/java

# some Java parameters  (设置java堆栈最大值,如果hadoop程序中需要很大的内存就可以修改这个参数)

JAVA_HEAP_MAX=-Xmx1000m 

# check envvars which might override default args(如果系统设置了HADOOP_HEAPSIZE,就覆盖JAVA_HEAP_MAX)

if [ "$HADOOP_HEAPSIZE" != "" ]; then

  #echo "run with heapsize $HADOOP_HEAPSIZE"

  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"

  #echo $JAVA_HEAP_MAX

fi

# CLASSPATH initially contains $HADOOP_CONF_DIR (设置CLASSPATH)

CLASSPATH="${HADOOP_CONF_DIR}"

#设定HADOOP_COMMON_HOM,上边是设置过了的

if [ "$HADOOP_COMMON_HOME" = "" ]; then

  if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then

    export HADOOP_COMMON_HOME=$HADOOP_PREFIX

  fi

fi

# for releases, add core hadoop jar & webapps to CLASSPATH(将webapps,lib中的东西都加载到CLASSPATH)

if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then

  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR

fi

if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then

  CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*'

fi

CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*'

# default log directory & file (设定日志文件目录和文件名称)

if [ "$HADOOP_LOG_DIR" = "" ]; then

  HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"

fi

if [ "$HADOOP_LOGFILE" = "" ]; then

  HADOOP_LOGFILE='hadoop.log'

fi

# default policy file for service-level authorization(指定hadoop-policy.xml文件,该文件用于hadoop服务级别的授权认证)

if [ "$HADOOP_POLICYFILE" = "" ]; then

  HADOOP_POLICYFILE="hadoop-policy.xml"

fi

# setup 'java.library.path' for native-hadoop code if necessary (设定java.library.path,将native-hadoop的一些东西加进去)

if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then

  if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then

    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then

      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR

    else

      JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR

    fi

  fi

fi

# setup a default TOOL_PATH(设置hadoop的工具包路径)

TOOL_PATH="${TOOL_PATH:-$HADOOP_PREFIX/share/hadoop/tools/lib/*}"

#设定hadoop的HADOOP_OPTS,将这些参数都设置进去

HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"

HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"

HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"

HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"

HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"

if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then

  HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"

  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH

fi

HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"

# Disable ipv6 as it can cause issues(使ipv6不可用)

HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# put hdfs in classpath if present(将hdfs的相关东西设置到CLASSPATH)

if [ "$HADOOP_HDFS_HOME" = "" ]; then

  if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then

    export HADOOP_HDFS_HOME=$HADOOP_PREFIX

  fi

fi

if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then

  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR

fi

if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then

  CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*'

fi

CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*'

# put yarn in classpath if present(将yarn的相关东西设置到CLASSPATH)

if [ "$HADOOP_YARN_HOME" = "" ]; then

  if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then

    export HADOOP_YARN_HOME=$HADOOP_PREFIX

  fi

fi

if [ -d "$HADOOP_YARN_HOME/$YARN_DIR/webapps" ]; then

  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR

fi

if [ -d "$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR" ]; then

  CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_LIB_JARS_DIR'/*'

fi

CLASSPATH=${CLASSPATH}:$HADOOP_YARN_HOME/$YARN_DIR'/*'

# put mapred in classpath if present AND different from YARN(将mapred的相关东西设置到CLASSPATH)

if [ "$HADOOP_MAPRED_HOME" = "" ]; then

  if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then

    export HADOOP_MAPRED_HOME=$HADOOP_PREFIX

  fi

fi

if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$HADOOP_YARN_HOME/$YARN_DIR" ] ; then

  if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then

    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR

  fi

  if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then

    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*'

  fi

  CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*'

fi

# Add the user-specified CLASSPATH via HADOOP_CLASSPATH

# Add it first or last depending on if user has

# set env-var HADOOP_USER_CLASSPATH_FIRST(将HADOOP_CLASSPATH设置到CLASSPATH中,并且设置到CLASSPATH前边)

if [ "$HADOOP_CLASSPATH" != "" ]; then

  # Prefix it if its to be preceded

  if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]; then

    CLASSPATH=${HADOOP_CLASSPATH}:${CLASSPATH}

  else

    CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}

  fi

fi
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息