您的位置:首页 > 大数据 > Hadoop

hadoop的HA实现,超详细

2016-09-22 09:01 323 查看
网上的转载,但超详细http://blog.csdn.net/xiaojin21cen/article/details/42611073我主要关注第三个问题,他们是主、备关系,但是datanode必须要求一致的吧问题导读:
1、hadoop ha是通过什么配置实现自动切换的?
2、配置中mapred与mapreduce的区别是什么?
3、hadoop ha两个namenode之间的关系是什么?

-- hadoop 版本:2.4.0
-- 安装包名:
hadoop-2.4.0.tar.gz 或者源码版本 hadoop-2.4.0-src.tar.gz(我hadoop、hbase、hive均是用的源码编译安装)

-- 安装参考: http://www.netfoucs.com/article/book_mmicky/79985.html http://www.byywee.com/page/M0/S934/934356.html http://www.itpub.net/thread-1631536-1-1.html http://demo.netfoucs.com/u014393917/article/details/25913363 http://www.aboutyun.com/thread-8294-1-1.html
-- 找不到本地库
参考:http://www.ercoppa.org/Linux-Com ... -hadoop-library.htm

-- lzo支持,
参考:http://blog.csdn.net/zhangzhaokun/article/details/17595325 http://slaytanic.blog.51cto.com/2057708/1162287/ http://hi.baidu.com/qingchunranzhi/item/3662ed5ed29d37a1adc85709

-- 安装以下RPM包:
yum -y install openssh*
yum -y install man*
yum -y install compat-libstdc++-33*
yum -y install libaio-0.*
yum -y install libaio-devel*
yum -y install sysstat-9.*
yum -y install glibc-2.*
yum -y install glibc-devel-2.* glibc-headers-2.*
yum -y install ksh-2*
yum -y install libgcc-4.*
yum -y install libstdc++-4.*
yum -y install libstdc++-4.*.i686*
yum -y install libstdc++-devel-4.*
yum -y install gcc-4.*x86_64*
yum -y install gcc-c++-4.*x86_64*
yum -y install elfutils-libelf-0*x86_64* elfutils-libelf-devel-0*x86_64*
yum -y install elfutils-libelf-0*i686* elfutils-libelf-devel-0*i686*
yum -y install libtool-ltdl*i686*
yum -y install ncurses*i686*
yum -y install ncurses*
yum -y install readline*
yum -y install unixODBC*
yum -y install zlib
yum -y install zlib*
yum -y install openssl*
yum -y install patch
yum -y install git
yum -y -y install lzo-devel zlib-devel gcc autoconf automake libtool
yum -y install lzop
yum -y install lrzsz
yum -y -y install lzo-devel zlib-devel gcc autoconf automake libtool
yum -y install nc
yum -y install glibc
yum -y install java-1.7.0-openjdk
yum -y install gzip
yum -y install zlib
yum -y install gcc
yum -y install gcc-c++
yum -y install make
yum -y install protobuf
yum -y install protoc
yum -y install cmake
yum -y install openssl-devel
yum -y install ncurses-devel
yum -y install unzip
yum -y install telnet
yum -y install telnet-server
yum -y install wget
yum -y install svn
yum -y install ntpdate

-- hive 安装,参考:http://kicklinux.com/hive-deploy/

5台服务器设计图

IP地址主机名NameNodeJournalNodeDataNodeZookeeperHbaseHive
192.168.117.194funshion-hadoop194
192.168.117.195funshion-hadoop195
192.168.117.196funshion-hadoop196是(Master)是(Mysql)
192.168.117.197funshion-hadoop197
192.168.117.198funshion-hadoop198
-- 配置Linux、安装JDK
--参考:linux(ubuntu)安装Java jdk环境变量设置及小程序测试

-- Step 1. 建立用户hadoop的ssh无密码登陆

--参考:
linux(ubuntu)无密码互通、相互登录高可靠文档
CentOS6.4之图解SSH无验证双向登陆配置

-- Step 2. zookeeper配置(配置奇数台zk集群,我用的5台)
-- 参考:Zookeeper集群环境安装过程详解

-- Step 3. Hadoop集群配置:

-- Step 3.1 vi $HADOOP_HOME/etc/hadoop/slaves

funshion-hadoop196
funshion-hadoop197
funshion-hadoop198

-- Step 3.2 vi $HADOOP_HOME/etc/hadoop/hadoop-env.sh (添加 JAVA_HOME 环境变量、本地library库)

export JAVA_HOME=/usr/java/latest
export LD_LIBRARY_PATH=/usr/local/hadoop/lzo/lib
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_PREFIX}/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib/native"

-- 注意:${HADOOP_PREFIX}/lib/native 下的内容如下:
[hadoop@funshion-hadoop194 native]$ pwd
/usr/local/hadoop/lib/native

[hadoop@funshion-hadoop194 native]$ ls -l
total 8640
-rw-r--r--. 1 hadoop hadoop 2850660 Jun 9 14:58 hadoop-common-2.4.0.jar
-rw-r--r--. 1 hadoop hadoop 1509888 Jun 9 14:58 hadoop-common-2.4.0-tests.jar
-rw-r--r--. 1 hadoop hadoop 178637 Jun 9 14:58 hadoop-lzo-0.4.20-SNAPSHOT.jar
-rw-r--r--. 1 hadoop hadoop 145385 Jun 9 14:58 hadoop-nfs-2.4.0.jar
-rw-r--r--. 1 hadoop hadoop 983042 Jun 6 19:36 libhadoop.a
-rw-r--r--. 1 hadoop hadoop 1487284 Jun 6 19:36 libhadooppipes.a
lrwxrwxrwx. 1 hadoop hadoop 18 Jun 6 19:42 libhadoop.so -> libhadoop.so.1.0.0
-rwxr-xr-x. 1 hadoop hadoop 586664 Jun 6 19:36 libhadoop.so.1.0.0
-rw-r--r--. 1 hadoop hadoop 582040 Jun 6 19:36 libhadooputils.a
-rw-r--r--. 1 hadoop hadoop 298178 Jun 6 19:36 libhdfs.a
lrwxrwxrwx. 1 hadoop hadoop 16 Jun 6 19:42 libhdfs.so -> libhdfs.so.0.0.0
-rwxr-xr-x. 1 hadoop hadoop 200026 Jun 6 19:36 libhdfs.so.0.0.0
drwxrwxr-x. 2 hadoop hadoop 4096 Jun 6 20:37 Linux-amd64-64

-- Step 3.3 vi $HADOOP_HOME/etc/hadoop/core-site.xml

-- (注意:fs.default.FS参数在两个namenode节点均一样,即5台机器的core-site.xml文件内容完全一样)

<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa_nn2</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>funshion-hadoop194:2181,funshion-hadoop195:2181,funshion-hadoop196:2181,funshion-hadoop197:2181,funshion-hadoop198:2181</value>
</property>

<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.native.lib</name>
<value>true</value>
</property>
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>60000</value>
<description>ms</description>
</property>
<property>
<name>ha.failover-controller.cli-check.rpc-timeout.ms</name>
<value>60000</value>
</property>
<property>
<name>ipc.client.connect.timeout</name>
<value>20000</value>
</property>
</configuration>

-- 注意:属性值dfs.ha.fencing.ssh.private-key-files的值id_rsa_nn2 是privatekey(即/home/hadoop/.ssh/目录id_rsa文件的拷贝,且权限为600)
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa_nn2</value>
</property>

-- Step 3.4 vi $HADOOP_HOME/etc/hadoop/hdfs-site.xml

<configuration>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>funshion-hadoop194:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>funshion-hadoop195:8020</value>
</property>
<property>
<name>dfs.namenode.servicerpc-address.mycluster.nn1</name>
<value>funshion-hadoop194:53310</value>
</property>
<property>:q
<name>dfs.namenode.servicerpc-address.mycluster.nn2</name>
<value>funshion-hadoop195:53310</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>funshion-hadoop194:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>funshion-hadoop195:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://funshion-hadoop194:8485;funshion-hadoop195:8485;funshion-hadoop196:8485;funshion-hadoop197:8485;funshion-hadoop198:8485/mycluster</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/mydata/journal</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>

<property>
<name>dfs.namenode.name.dir</name>
<value>file:///home/hadoop/mydata/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///home/hadoop/mydata/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.image.transfer.bandwidthPerSec</name>
<value>1048576</value>
</property>
</configuration>

-- Step 3.5 vi $HADOOP_HOME/etc/hadoop/mapred-site.xml

<configuration>
<property>
<name>mapreduce.jobhistory.address</name>
<value>funshion-hadoop194:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>funshion-hadoop194:19888</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>mapred.child.env</name>
<value>LD_LIBRARY_PATH=/usr/local/hadoop/lib/native</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx2048m</value>
</property>
<property>
<name>mapred.reduce.child.java.opts</name>
<value>-Xmx2048m</value>
</property>
<property>
<name>mapred.map.child.java.opts</name>
<value>-Xmx2048m</value>
</property>
<property>
<name>mapred.remote.os</name>
<value>Linux</value>
<description>Remote MapReduce framework's OS, can be either Linux or Windows</description>
</property>
</configuration>

-- 注意:1、以mapred.开头的形式去指定属性名,都是一种过时的形式,建议使用mapreduce.
比如:mapred.compress.map.output 属性应该对应修改成:mapreduce.map.output.compress
具体可以查阅:http://hadoop.apache.org/docs/r2 ... /mapred-default.xml 文件,
      当然,好像还有少量属性名是没有修改的,比如:mapred.child.java.opts、mapred.child.env

-- 注意:/usr/local/hadoop/lib/native 目录下有如下内容:
[hadoop@funshion-hadoop194 sbin]$ ls -l /usr/local/hadoop/lib/native
total 12732
-rw-r--r-- 1 hadoop hadoop 2850900 Jun 20 19:22 hadoop-common-2.4.0.jar
-rw-r--r-- 1 hadoop hadoop 1509411 Jun 20 19:22 hadoop-common-2.4.0-tests.jar
-rw-r--r-- 1 hadoop hadoop 178559 Jun 20 18:38 hadoop-lzo-0.4.20-SNAPSHOT.jar
-rw-r--r-- 1 hadoop hadoop 1407039 Jun 20 19:25 hadoop-yarn-common-2.4.0.jar
-rw-r--r-- 1 hadoop hadoop 106198 Jun 20 18:37 libgplcompression.a
-rw-r--r-- 1 hadoop hadoop 1124 Jun 20 18:37 libgplcompression.la
-rwxr-xr-x 1 hadoop hadoop 69347 Jun 20 18:37 libgplcompression.so
-rwxr-xr-x 1 hadoop hadoop 69347 Jun 20 18:37 libgplcompression.so.0
-rwxr-xr-x 1 hadoop hadoop 69347 Jun 20 18:37 libgplcompression.so.0.0.0
-rw-r--r-- 1 hadoop hadoop 983042 Jun 20 18:10 libhadoop.a
-rw-r--r-- 1 hadoop hadoop 1487284 Jun 20 18:10 libhadooppipes.a
lrwxrwxrwx 1 hadoop hadoop 18 Jun 20 18:27 libhadoop.so -> libhadoop.so.1.0.0
-rwxr-xr-x 1 hadoop hadoop 586664 Jun 20 18:10 libhadoop.so.1.0.0
-rw-r--r-- 1 hadoop hadoop 582040 Jun 20 18:10 libhadooputils.a
-rw-r--r-- 1 hadoop hadoop 298178 Jun 20 18:10 libhdfs.a
lrwxrwxrwx 1 hadoop hadoop 16 Jun 20 18:27 libhdfs.so -> libhdfs.so.0.0.0
-rwxr-xr-x 1 hadoop hadoop 200026 Jun 20 18:10 libhdfs.so.0.0.0
-rw-r--r-- 1 hadoop hadoop 906318 Jun 20 19:17 liblzo2.a
-rwxr-xr-x 1 hadoop hadoop 929 Jun 20 19:17 liblzo2.la
-rwxr-xr-x 1 hadoop hadoop 562376 Jun 20 19:17 liblzo2.so
-rwxr-xr-x 1 hadoop hadoop 562376 Jun 20 19:17 liblzo2.so.2
-rwxr-xr-x 1 hadoop hadoop 562376 Jun 20 19:17 liblzo2.so.2.0.0

-- Step 3.6 vi $HADOOP_HOME/etc/hadoop/yarn-site.xml

<configuration>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>60000</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rm-cluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>funshion-hadoop194</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>funshion-hadoop195</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>funshion-hadoop194:2181,funshion-hadoop195:2181,funshion-hadoop196:2181,funshion-hadoop197:2181,funshion-hadoop198:2181</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>${yarn.resourcemanager.hostname.rm1}:23140</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>${yarn.resourcemanager.hostname.rm1}:23130</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>${yarn.resourcemanager.hostname.rm1}:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>${yarn.resourcemanager.hostname.rm1}:23188</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>${yarn.resourcemanager.hostname.rm1}:23125</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>${yarn.resourcemanager.hostname.rm1}:23141</value>
</property>

<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>${yarn.resourcemanager.hostname.rm2}:23140</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>${yarn.resourcemanager.hostname.rm2}:23130</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>${yarn.resourcemanager.hostname.rm2}:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>${yarn.resourcemanager.hostname.rm2}:23188</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>${yarn.resourcemanager.hostname.rm2}:23125</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>${yarn.resourcemanager.hostname.rm2}:23141</value>
</property>

<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>${yarn.home.dir}/etc/hadoop/fairscheduler.xml</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/home/hadoop/logs/yarn_local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/home/hadoop/logs/yarn_log</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/home/hadoop/logs/yarn_remotelog</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/home/hadoop/logs/yarn_userstag</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/home/hadoop/logs/yarn_intermediatedone</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/var/lib/hadoop/dfs/yarn_done</value>
</property>

<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4.2</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_HOME/etc/hadoop,
$HADOOP_HOME/share/hadoop/common/*,
$HADOOP_HOME/share/hadoop/common/lib/*,
$HADOOP_HOME/share/hadoop/hdfs/*,
$HADOOP_HOME/share/hadoop/hdfs/lib/*,
$HADOOP_HOME/share/hadoop/mapreduce/*,
$HADOOP_HOME/share/hadoop/mapreduce/lib/*,
$HADOOP_HOME/share/hadoop/yarn/*,
$HADOOP_HOME/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>

-- 注意:两个namenode,funshion-hadoop194直接用上面的配置,
-- funshion-hadoop195的话,只需修改一个地方:修改yarn.resourcemanager.ha.id 属性值为 rm2
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  hadoop HA
相关文章推荐