树莓派hadoop集群搭建
2016-06-24 11:31
453 查看
软件版本:
hadoop-2.6.4;hbase-0.98.20-hadoop2;zookeeper-3.4.6
使用的源:
deb http://mirrors.ustc.edu.cn/raspbian/raspbian/ jessie main contrib non-free rpi deb-src http://mirrors.ustc.edu.cn/raspbian/raspbian/ jessie main contrib non-free rpi
结构:
主机名 IP 安装的软件 运行的进程 nna 192.168.11.81 jdk、hadoop NameNode、DFSZKFailoverController(zkfc) nns 192.168.11.82 jdk、hadoop NameNode、DFSZKFailoverController(zkfc) rma 192.168.11.83 jdk、hadoop ResourceManager rms 192.168.11.84 jdk、hadoop ResourceManager hba 192.168.11.85 jdk、hadoop、hbase HMaster hbs 192.168.11.86 jdk、hadoop、hbase HMaster dn1 192.168.11.91 jdk、hadoop、zookeeper、hbase DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer dn2 192.168.11.92 jdk、hadoop、zookeeper、hbase DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer dn3 192.168.11.93 jdk、hadoop、zookeeper、hbase DataNode、NodeManager、JournalNode、QuorumPeerMain、HRegionServer
1.创建hadoop用户(root下操作)
adduser hadoop chmod +w /etc/sudoers hadoop ALL=(root)NOPASSWD:ALL chmod -w /etc/sudoers
2.同步时间
sudo cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
3.U盘开机自动挂载
U盘格式为fat32 == vfatuid为用户ID,gid为用户组ID,id命令查看
修改/etc/fstab,在末尾添加
/dev/sda1 /hadoop vfat suid,exec,dev,noatime,user,utf8,rw,auto,async,uid=1001,gid=1001 0 0
4.配置hosts
修改/etc/hosts192.168.11.81 nna 192.168.11.82 nns 192.168.11.83 mra 192.168.11.84 mrs 192.168.11.91 dn1 192.168.11.92 dn2 192.168.11.93 dn3
修改/etc/hotname
nna
5.安装jdk
安装openjdk或orcaljdksudo apt-cache search jdk sudo apt-get install openjdk-8-jdk sudo apt-get install oracle-java8-jdk
6.配置环境变量
修改/etc/profile# set java environment export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/ export JRE_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/jre export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin # set hadoop environment export HADOOP_HOME=/home/hadoop/hadoop-2.6.4 export PATH=$PATH:$HADOOP_HOME/bin # set zookeeper environment export ZK_HOME=/home/hadoop/zookeeper-3.4.6 export PATH=$PATH:$ZK_HOME/bin # set hbase environment export HBASE_HOME=/home/hadoop/hbase-0.98.20-hadoop2 export PATH=$PATH:$HBASE_HOME/bin
7.创建目录
mkdir -p /hadoop/tmp mkdir -p /hadoop/data/tmp/journal mkdir -p /hadoop/data/dfs/name mkdir -p /hadoop/data/dfs/data mkdir -p /hadoop/data/yarn/local mkdir -p /hadoop/data/zookeeper mkdir -p /hadoop/log/yarn
8.安装zookeeper
修改 ~/zookeeper-3.4.6/conf/zoo.cfg# The number of milliseconds of each tick # 服务器与客户端之间交互的基本时间单元(ms) tickTime=2000 # The number of ticks that the initial # synchronization phase can take # zookeeper所能接受的客户端数量 initLimit=10 # The number of ticks that can pass between # sending a request and getting an acknowledgement # 服务器和客户端之间请求和应答之间的时间间隔 syncLimit=5 # the directory where the snapshot is stored. # do not use /tmp for storage, /tmp here is just # example sakes. # 保存zookeeper数据,日志的路径 dataDir=/hadoop/data/zookeeper # the port at which the clients will connect # 客户端与zookeeper相互交互的端口 clientPort=2181 server.1=dn1:2888:3888 server.2=dn2:2888:3888 server.3=dn3:2888:3888 # server.A=B:C:D # 其中A是一个数字,代表这是第几号服务器;B是服务器的IP地址; # C表示服务器与群集中的“领导者”交换信息的端口;当领导者失效后,D表示用来执行选举时服务器相互通信的端口。 # the maximum number of client connections. # increase this if you need to handle more clients #maxClientCnxns=60 # # Be sure to read the maintenance section of the # administrator guide before turning on autopurge. # # http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance # # The number of snapshots to retain in dataDir #autopurge.snapRetainCount=3 # Purge task interval in hours # Set to "0" to disable auto purge feature #autopurge.purgeInterval=1
接下来,dn节点下的的dataDir目录下创建一个myid文件,里面写入一个0-255之间的一个随意数字,
文件中序号要与dn节点下的zk配置序号一直,
如:server.1=dn1:2888:3888,那么dn1节点下的myid配置文件应该写上1
9.安装hadoop
修改/etc/hadoop/slavesdn1 dn2 dn3
修改/etc/hadoop/hadoop-env.sh
# The java implementation to use. export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/
修改/etc/hadoop/yarn-env.sh
# some Java parameters export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/
修改/etc/hadoop/core-site.xml
<configuration> <!-- 指定hdfs的nameservice为cluster --> <property> <name>fs.defaultFS</name> <value>hdfs://cluster</value> </property> <property> <name>io.file.buffer.size</name> <value>65535</value> </property> <!-- 指定hadoop临时目录 --> <property> <name>hadoop.tmp.dir</name> <value>/hadoop/tmp</value> </property> <property> <name>hadoop.proxyuser.hduser.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.hduser.groups</name> <value>*</value> </property> <!-- 指定zookeeper地址 --> <property> <name>ha.zookeeper.quorum</name> <value>dn1:2181,dn2:2181,dn3:2181</value> </property> </configuration>
修改/etc/hadoop/hdfs-site.xml
<configuration> <!--指定hdfs的nameservice为cluster,需要和core-site.xml中的保持一致 --> <property> <name>dfs.nameservices</name> <value>cluster</value> </property> <!-- cluster下面有两个NameNode,分别是nna,nns --> <property> <name>dfs.ha.namenodes.cluster</name> <value>nna,nns</value> </property> <!-- nna的RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.cluster.nna</name> <value>nna:9000</value> </property> <!-- nns的RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.cluster.nns</name> <value>nns:9000</value> </property> <!-- nna的http通信地址 --> <property> <name>dfs.namenode.http-address.cluster.nna</name> <value>nna:50070</value> </property> <!-- nns的http通信地址 --> <property> <name>dfs.namenode.http-address.cluster.nns</name> <value>nns:50070</value> </property> <!-- 指定NameNode的元数据在JournalNode上的存放位置 --> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://dn1:8485;dn2:8485;dn3:8485/cluster</ 4000 value> </property> <!-- 指定JournalNode在本地磁盘存放数据的位置 --> <property> <name>dfs.journalnode.edits.dir</name> <value>/hadoop/data/tmp/journal</value> </property> <!-- 开启NameNode失败自动切换 --> <property> <name>dfs.ha.automatic-failover.enabled</name> <value>true</value> </property> <!-- 配置失败自动切换实现方式 --> <property> <name>dfs.client.failover.proxy.provider.cluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行--> <property> <name>dfs.ha.fencing.methods</name> <value>sshfence</value> </property> <!-- 使用sshfence隔离机制时需要ssh免登陆 --> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/home/hadoop/.ssh/id_rsa</value> </property> <!-- 配置sshfence隔离机制超时时间 --> <property> <name>dfs.ha.fencing.ssh.connect-timeout</name> <value>30000</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>/hadoop/data/dfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>/hadoop/data/dfs/data</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <name>dfs.journalnode.http-address</name> <value>0.0.0.0:8480</value> </property> <property> <name>dfs.journalnode.rpc-address</name> <value>0.0.0.0:8485</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>dn1:2181,dn2:2181,dn3:2181</value> </property> </configuration>
修改/etc/hadoop/mapred-site.xml
<configuration> <!-- 指定mr框架为yarn方式 --> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>nna:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>nna:19888</value> </property> </configuration>
修改/etc/hadoop/yarn-site.xml
<configuration> <property> <name>yarn.resourcemanager.connect.retry-interval.ms</name> <value>2000</value> </property> <!-- 开启RM高可靠 --> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <!-- 指定RM的名字 --> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>dn1:2181,dn2:2181,dn3:2181</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> <value>true</value> </property> <!-- 指定RM1的地址 --> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>nna</value> </property> <!-- 指定RM2的地址 --> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>nns</value> </property> <!--在namenode1上配置rm1,在namenode2上配置rm2,注意:一般都喜欢把配置好的文件远程复制到其它机器上,但这个在YARN的另一个机器上一定要修改 --> <property> <name>yarn.resourcemanager.ha.id</name> <value>rm1</value> </property> <!--开启自动恢复功能 --> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <!--配置与zookeeper的连接地址 --> <property> <name>yarn.resourcemanager.zk-state-store.address</name> <value>dn1:2181,dn2:2181,dn3:2181</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <!-- 指定zk集群地址 --> <property> <name>yarn.resourcemanager.zk-address</name> <value>dn1:2181,dn2:2181,dn3:2181</value> </property> <!-- 指定RM的cluster id --> <property> <name>yarn.resourcemanager.cluster-id</name> <value>cluster1-yarn</value> </property> <!--schelduler失联等待连接时间 --> <property> <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name> <value>5000</value> </property> <!--配置rm1 --> <property> <name>yarn.resourcemanager.address.rm1</name> <value>nna:8132</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm1</name> <value>nna:8130</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>nna:8188</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm1</name> <value>nna:8131</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm1</name> <value>nna:8033</value> </property> <property> <name>yarn.resourcemanager.ha.admin.address.rm1</name> <value>nna:23142</value> </property> <!--配置rm2 --> <property> <name>yarn.resourcemanager.address.rm2</name> <value>nns:8132</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm2</name> <value>nns:8130</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>nns:8188</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm2</name> <value>nns:8131</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm2</name> <value>nns:8033</value> </property> <property> <name>yarn.resourcemanager.ha.admin.address.rm2</name> <value>nns:23142</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/home/hadoop/data/yarn/local</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/home/hadoop/log/yarn</value> </property> <property> <name>mapreduce.shuffle.port</name> <value>23080</value> </property> <!--故障处理类 --> <property> <name>yarn.client.failover-proxy-provider</name> <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name> <value>/yarn-leader-election</value> </property> </configuration>
10.安装hbase
覆盖hbase中lib文件夹下的 zookeeper*.jar 文件rm -rf hbase-0.98.20-hadoop2/lib/zookeeper*.jar find zookeeper-3.4.6/ -name "zookeeper*.jar" | xargs -i cp {} hbase-0.98.20-hadoop2/lib/
覆盖hbase中lib文件夹下的 hadoop*.jar 文件
rm -rf hbase-0.98.20-hadoop2/lib/hadoop*.jar find hadoop-2.6.4/share/hadoop -name "hadoop*.jar" | xargs -i cp {} hbase-0.98.20-hadoop2/lib/
修改conf/hbase-env.sh
export JAVA_HOME=/usr/lib/jvm/jdk-8-oracle-arm32-vfp-hflt/ export HBASE_MANAGES_ZK=flase //HBase是否管理它自己的ZooKeeper的实例。
修改conf/regionservers
dn1 dn2 dn3
修改conf/hbase-site.xml
$HBASE_HOME/conf/hbase-site.xml的hbase.rootdir的主机和端口号与$HADOOP_HOME/conf/core-site.xml的fs.default.name的主机和端口号一致
<configuration> <property> <name>hbase.rootdir</name> <value>hdfs://nna:9000/hbase</value> </property> <property> <name>hbase.cluster.distributed</name> <value>true</value> <description>The mode the cluster will be in. Possible values are false: standalone and pseudo-distributed setups with managed Zookeeper true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh) </description> </property> <property> <name>hbase.master</name> <value>nna:60000</value> </property> <property> <name>hbase.master.port</name> <value>60000</value> <description>The port master should bind to.</description> </property> <property> <name>hbase.zookeeper.quorum</name> <value>dn1:2181,dn2:2181,dn3:2181</value> </property> <property> <name>hbase.zookeeper.property.clientPort</name> <value>2181</value> </property> <property> <name>hbase.zookeeper.property.dataDir</name> <value>/hadoop/data/zookeeper</value> <description>Property from ZooKeeper config zoo.cfg. The directory where the snapshot is stored. </description> </property> </configuration>
11.备份镜像,并刻录至各个节点
修改dn节点下的的dataDir目录下的myid文件12.配置免密码登陆
ssh-keygen -t rsa ssh-copy-id -i Master ssh-copy-id -i ~/.ssh/id_rsa.pub nna ssh-copy-id -i ~/.ssh/id_rsa.pub nns ssh-copy-id -i ~/.ssh/id_rsa.pub dn1 ssh-copy-id -i ~/.ssh/id_rsa.pub dn2 ssh-copy-id -i ~/.ssh/id_rsa.pub dn3
13.初始化并启动各个模块
//------------------------------------------------------------------------方案一
启动zookeeper
在 dn1、dn2、dn3上启动
#./zookeeper-3.4.6/bin/zkServer.sh start #./zookeeper-3.4.6/bin/zkServer.sh restart
在 dn1、dn2、dn3上查看状态:一个leader,两个follower
#./zookeeper-3.4.6/bin/zkServer.sh status
在 dn1、dn2、dn3上启动
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start journalnode
在 nna 上格式化hdfs
hadoop namenode –format
格式化后会在根据core-site.xml中的hadoop.tmp.dir配置生成个文件
拷贝至nns、dn1、dn2、dn3
scp -r /hadoop/data/dfs/name/current hadoop@nns:/hadoop/data/dfs/name/current scp -r /hadoop/data/dfs/name/current hadoop@dn1:/hadoop/data/dfs/name/current scp -r /hadoop/data/dfs/name/current hadoop@dn2:/hadoop/data/dfs/name/current scp -r /hadoop/data/dfs/name/current hadoop@dn3:/hadoop/data/dfs/name/current
在 nna、nns上格式化ZK
#hdfs zkfc -formatZK
在 nna 上启动HDFS
#./hadoop-2.6.4/sbin/start-dfs.sh
启动rma的YARN
#./hadoop-2.6.4/sbin/start-yarn.sh
启动rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh start resourcemanager
启动hbase
在hba上启动hbase
start-hbase.sh
在hbs上启动hbase
hbase-daemon.sh start master
//------------------------------------------------------------------------
方案二
启动zookeeper
在 dn1、dn2、dn3上启动
#./zookeeper-3.4.6/bin/zkServer.sh start #./zookeeper-3.4.6/bin/zkServer.sh restart
在 dn1、dn2、dn3上查看状态:一个leader,两个follower
#./zookeeper-3.4.6/bin/zkServer.sh status
在 dn1、dn2、dn3上启动
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start journalnode
格式化nna的NameNode
hdfs namenode –format
启动nna的NameNode
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start namenode
格式化nns的NameNode
hdfs namenode -bootstrapStandby
启动nns的NameNode
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start namenode
在nna转换active
hdfs haadmin -transitionToActive nna
在nna启动DataNodes
#./hadoop-2.6.4/sbin/hadoop-daemons.sh start datanode
切换nna、nns角色
hdfs haadmin –failover –forceactive nna nns
启动rma的YARN
#./hadoop-2.6.4/sbin/start-yarn.sh
启动rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh start resourcemanager
启动hbase
在hba上启动hbase
start-hbase.sh
在hbs上启动hbase
hbase-daemon.sh start master
//------------------------------------------------------------------------
14.关闭集群
在hbs上关闭hbasehbase-daemon.sh stop master
在hba上关闭hbase
stop-hbase.sh
关闭rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh stop resourcemanager
关闭rma的YARN
#./hadoop-2.6.4/sbin/stop-yarn.sh
在 nna 上关闭HDFS
#./hadoop-2.6.4/sbin/stop-dfs.sh
在 d
a3bd
n1、dn2、dn3上关闭zookeeper
#./zookeeper-3.4.6/bin/zkServer.sh stop
15.再次启动
在 dn1、dn2、dn3上启动zookeeper#./zookeeper-3.4.6/bin/zkServer.sh start
在 dn1、dn2、dn3上查看状态:一个leader,两个follower
#./zookeeper-3.4.6/bin/zkServer.sh status
在 dn1、dn2、dn3上启动
#./hadoop-2.6.4/sbin/hadoop-daemon.sh start journalnode
在 nna 上启动HDFS
#./hadoop-2.6.4/sbin/start-dfs.sh
启动rma的YARN
#./hadoop-2.6.4/sbin/start-yarn.sh
启动rms的YARN
#./hadoop-2.6.4/sbin/yarn-daemon.sh start resourcemanager
在hba上启动hbase
start-hbase.sh
在hbs上启动hbase
hbase-daemon.sh start master
16.验证
http://nna:50070http://nns:50070 http://192.168.11.81:8188 http://192.168.11.81:8188 http://hba:60010 http://hbs:60010
19888
17.增加节点
相关文章推荐
- 详解HDFS Short Circuit Local Reads
- Hadoop_2.1.0 MapReduce序列图
- 使用Hadoop搭建现代电信企业架构
- 单机版搭建Hadoop环境图文教程详解
- hadoop常见错误以及处理方法详解
- hadoop 单机安装配置教程
- hadoop的hdfs文件操作实现上传文件到hdfs
- hadoop实现grep示例分享
- Apache Hadoop版本详解
- linux下搭建hadoop环境步骤分享
- hadoop client与datanode的通信协议分析
- hadoop中一些常用的命令介绍
- Hadoop单机版和全分布式(集群)安装
- 用PHP和Shell写Hadoop的MapReduce程序
- hadoop map-reduce中的文件并发操作
- Hadoop1.2中配置伪分布式的实例
- hadoop上传文件功能实例代码
- java结合HADOOP集群文件上传下载
- Hadoop 2.x伪分布式环境搭建详细步骤
- Java访问Hadoop分布式文件系统HDFS的配置说明