hadoop伪分布式环境apache版本切换为CDH
2017-03-13 07:32
429 查看
1.在/opt/modules下创建一个cdh空目录
$ mkdir cdh //cdh版本的hadoop, hive,sqoop安装在此目录下
2.解压cdh版本的hadoop,hive,sqoop
$ tar -zxf hadoop-2.5.0-cdh5.3.6.tar.gz -C /opt/modules/cdh
$ tar -zxf hive-0.13.1-cdh5.3.6.tar.gz -C /opt/modules/cdh
$ tar -zxf sqoop-1.4.5-cdh5.3.6.tar.gz -C /opt/modules/cdh
3.配置hadoop
1).${HADOOP_HOME}/etc/hadoop下
hadoop-env.sh mared-env.sh yarn-env.sh
添加
export JAVA_HOME=/opt/modules/jdk1.7.0_79
2).${HADOOP_HOME}/etc/hadoop/core-site.xml(核心配置文件)
<!--NameNode地址,8020是指定进程8020,访问入口 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://[hostname]:8020</value>
</property>
<!--hadoop在运行时产生的文件 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/modules/cdh/hadoop-2.5.0-cdh5.3.6/data</value>
</property>
3).${HADOOP_HOME}/etc/hadoop/hdfs-site.xml (HDFS模块配置信息)
<!-- 存放在hdfs上的副本数,在伪分布式设置为1 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- 关闭hdf权限控制 -->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
4).${HADOOP_HOME}/etc/hadoop/yarn-site.xml
<!-- 指定ResorceManager所在服务器的主机名-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>[hostname]</value>
</property>
<!-- 指明在执行MapReduce的时候使用shuffle-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--启用日志聚合功能-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--日志保存时间-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
5).${HADOOP_HOME}/etc/hadoop/复制并重命名模板文件
$ cp mapred-site.xml.template mapred-site.xml
<!-- 指定MapReduce基于Yarn来运行-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<!--配置实际的主机名和端口-->
<value>[hostname]:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>[hostname]:19888</value>
</property>
6).${HADOOP_HOME}/etc/hadoop/slaves
hadoop.beifeng.com
7).${HADOOP_HOME}/
格式化hdfs
## 注意:是cdh版本的hadoop下的hdfs命令
$ bin/hdfs namenode -format
8).${HADOOP_HOME}/启动hadoop
$ sbin/start-dfs.sh
$ sbin/start-yarn.sh
$ sbin/mr-jobhistory-daemon.sh start historyserver
3.hive部署
首先在${HIVE_HOME}/conf/重命名生成配置文件
$ cp hive-env.sh.template hive-env.sh
$ cp hive-default.xml.template hive-site.xml
$ cp hive-log4j.properties.template hive-log4j.properties
1).${HIVE_HOME}/conf/hive-env.sh
export JAVA_HOME=/opt/modules/jdk1.7.0_79
export HADOOP_HOME=/opt/modules/cdh/hive-0.13.1-cdh5.3.6
export HIVE_CONF_DIR=/opt/modules/cdh/hive-0.13.1-cdh5.3.6/conf
2).${HIVE_HOME}/conf/hive-site.xml
--JDBC 连接四要素
<!--132行 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://[hostname]:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<!--138行 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<!--162行 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root123</value>
<description>password to use against metastore database</description>
</property>
===配置hiveserver2
<!--2183行 去掉5000L后面的“L”-->
<property>
<name>hive.server2.long.polling.timeout</name>
<value>5000L</value>
<description>Time in milliseconds that HiveServer2 will wait, before responding to asynchronous calls that use long polling</description>
</property>
<!--2196行 -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>Port number of HiveServer2 Thrift interface.
Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>[hostname]</value>
<description>Bind host on which to run the HiveServer2 Thrift interface.
Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST</description>
</property>
***<!--2786行少一个<property>标签 -->
3).${HIVE_HOME}/conf/hive-log4j.properties
hive.log.dir=${HIVE_HOME}/logs
4).## 拷贝驱动包到${HIVE_HOME}/lib
$ cp mysql-connector-java-5.1.34-bin.jar /opt/modules/cdh/hive-0.13.1-cdh5.3.6/lib/
5).在HDFS上创建相关目录并修改权限
$ bin/hdfs dfs -mkdir /tmp
$ bin/hdfs dfs -mkdir -p /user/hive/warehouse
$ bin/hdfs dfs -chmod g+w /tmp
$ bin/hdfs dfs -chmod g+w /user/hive/warehouse
4.Zookeeper安装
1)${ZOOKEEPER_HOME}/conf
$ cp zoo_sample.cfg zoo.cfg
2)${ZOOKEEPER_HOME}/conf
$ vi zoo.cfg
dataDir=/opt/modules/zookeeper-3.4.5/zkData
启动zookeeper
bin/zkServer.sh start
8967 QuorumPeerMain
##查看zookeeper的状态
bin/zkServer.sh status
$ mkdir cdh //cdh版本的hadoop, hive,sqoop安装在此目录下
2.解压cdh版本的hadoop,hive,sqoop
$ tar -zxf hadoop-2.5.0-cdh5.3.6.tar.gz -C /opt/modules/cdh
$ tar -zxf hive-0.13.1-cdh5.3.6.tar.gz -C /opt/modules/cdh
$ tar -zxf sqoop-1.4.5-cdh5.3.6.tar.gz -C /opt/modules/cdh
3.配置hadoop
1).${HADOOP_HOME}/etc/hadoop下
hadoop-env.sh mared-env.sh yarn-env.sh
添加
export JAVA_HOME=/opt/modules/jdk1.7.0_79
2).${HADOOP_HOME}/etc/hadoop/core-site.xml(核心配置文件)
<!--NameNode地址,8020是指定进程8020,访问入口 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://[hostname]:8020</value>
</property>
<!--hadoop在运行时产生的文件 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/modules/cdh/hadoop-2.5.0-cdh5.3.6/data</value>
</property>
3).${HADOOP_HOME}/etc/hadoop/hdfs-site.xml (HDFS模块配置信息)
<!-- 存放在hdfs上的副本数,在伪分布式设置为1 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- 关闭hdf权限控制 -->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
4).${HADOOP_HOME}/etc/hadoop/yarn-site.xml
<!-- 指定ResorceManager所在服务器的主机名-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>[hostname]</value>
</property>
<!-- 指明在执行MapReduce的时候使用shuffle-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--启用日志聚合功能-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--日志保存时间-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
5).${HADOOP_HOME}/etc/hadoop/复制并重命名模板文件
$ cp mapred-site.xml.template mapred-site.xml
<!-- 指定MapReduce基于Yarn来运行-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<!--配置实际的主机名和端口-->
<value>[hostname]:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>[hostname]:19888</value>
</property>
6).${HADOOP_HOME}/etc/hadoop/slaves
hadoop.beifeng.com
7).${HADOOP_HOME}/
格式化hdfs
## 注意:是cdh版本的hadoop下的hdfs命令
$ bin/hdfs namenode -format
8).${HADOOP_HOME}/启动hadoop
$ sbin/start-dfs.sh
$ sbin/start-yarn.sh
$ sbin/mr-jobhistory-daemon.sh start historyserver
3.hive部署
首先在${HIVE_HOME}/conf/重命名生成配置文件
$ cp hive-env.sh.template hive-env.sh
$ cp hive-default.xml.template hive-site.xml
$ cp hive-log4j.properties.template hive-log4j.properties
1).${HIVE_HOME}/conf/hive-env.sh
export JAVA_HOME=/opt/modules/jdk1.7.0_79
export HADOOP_HOME=/opt/modules/cdh/hive-0.13.1-cdh5.3.6
export HIVE_CONF_DIR=/opt/modules/cdh/hive-0.13.1-cdh5.3.6/conf
2).${HIVE_HOME}/conf/hive-site.xml
--JDBC 连接四要素
<!--132行 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://[hostname]:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<!--138行 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<!--162行 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root123</value>
<description>password to use against metastore database</description>
</property>
===配置hiveserver2
<!--2183行 去掉5000L后面的“L”-->
<property>
<name>hive.server2.long.polling.timeout</name>
<value>5000L</value>
<description>Time in milliseconds that HiveServer2 will wait, before responding to asynchronous calls that use long polling</description>
</property>
<!--2196行 -->
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
<description>Port number of HiveServer2 Thrift interface.
Can be overridden by setting $HIVE_SERVER2_THRIFT_PORT</description>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>[hostname]</value>
<description>Bind host on which to run the HiveServer2 Thrift interface.
Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST</description>
</property>
***<!--2786行少一个<property>标签 -->
3).${HIVE_HOME}/conf/hive-log4j.properties
hive.log.dir=${HIVE_HOME}/logs
4).## 拷贝驱动包到${HIVE_HOME}/lib
$ cp mysql-connector-java-5.1.34-bin.jar /opt/modules/cdh/hive-0.13.1-cdh5.3.6/lib/
5).在HDFS上创建相关目录并修改权限
$ bin/hdfs dfs -mkdir /tmp
$ bin/hdfs dfs -mkdir -p /user/hive/warehouse
$ bin/hdfs dfs -chmod g+w /tmp
$ bin/hdfs dfs -chmod g+w /user/hive/warehouse
4.Zookeeper安装
1)${ZOOKEEPER_HOME}/conf
$ cp zoo_sample.cfg zoo.cfg
2)${ZOOKEEPER_HOME}/conf
$ vi zoo.cfg
dataDir=/opt/modules/zookeeper-3.4.5/zkData
启动zookeeper
bin/zkServer.sh start
8967 QuorumPeerMain
##查看zookeeper的状态
bin/zkServer.sh status
相关文章推荐
- hadoop2.6.0版本搭建伪分布式环境
- Apache/CDH 版本下 Hadoop 编译 Eclipse 插件总结
- hadoop2.7.0版本搭建伪分布式环境
- Hadoop安装(版本一)———Ubuntu16.04 下 hadoop的安装与配置(伪分布式环境)
- cdh-hadoop2.6.0伪分布式环境搭建
- CDH版本hadoop2.6伪分布式安装
- Hadoop cdh版本分布式集群搭建图文教程
- hadoop CDH3版和apache 0.20版本的对比
- macOS Sierra版本下Hadoop(2.7.3)为分布式环境的详细安装
- Hadoop 2.6.0-cdh5.4.0集群环境搭建和Apache-Hive、Sqoop的安装
- hadoop apache版本和cdh版本的区别和联系
- Apache spark2.1.0编译hadoop-2.6-cdh5.11.2的对应版本
- Apache Hadoop2.8.0伪分布式环境搭建
- Apache版hadoop环境搭建(伪分布式)
- hadoop2.6.0版本搭建伪分布式环境
- hadoop2.6.0版本搭建伪分布式环境
- Hadoop安装、Hadoop环境搭建(Apache)版本
- Apache Hadoop 分布式集群环境安装配置详细步骤
- Win下Apache+TortoiseSVN安装配置版本控制的环境
- SVN+Apache+AnkhSVN搭建版本控制环境