hadoop hive安装手记(转)
2012-09-06 17:50
411 查看
hive是基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,并提供完整的sql查询功能,可以将sql语句转换为 MapReduce任务进行运行。 其优点是学习成本低,可以通过类SQL语句快速实现简单的MapReduce统计,不必开发专门的MapReduce应用,十分适合数据仓库的统计分析。 [网络环境设置]
vim /etc/hosts
[安装hadoop]
确保所有机器有 ssh rsync jdk
确保设置了:
export JAVA_HOME=/opt/soft/jdk
hive在0.20.x的hadoop做了大量的测试,因此选择0.20
[配置hadoop]
vim conf/core-site.xml
修改:
修改:
修改:
scp conf/* hadoop3:/opt/soft/hadoop-0.20.2/conf/
[初始化]
./start-all.sh
[验证]
$HADOOP_HOME/bin/hadoop dfs -ls /
打开 http://192.168.100.52:50030 http://192.168.100.52:50070
[搭建hive集群]
下载
只需要在hadoop1机器上安装
[配置hive]
准备mysql:hadoop1 user:hadoop pwd:hadoop
复制一个mysql-connector-java-5.1.10.jar到hive/lib下后:
安装结束。
vim /etc/hosts
192.168.100.52 hadoop1
192.168.99.34 hadoop2
192.168.103.135 hadoop3
分别到对应机器执行:192.168.99.34 hadoop2
192.168.103.135 hadoop3
hostname hadoop1
hostname hadoop2
hostname hadoop3
[打通机器]hostname hadoop2
hostname hadoop3
hadoop1# ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
hadoop1# scp ~/.ssh/id_dsa.pub hadoop2:/root/
hadoop1# scp ~/.ssh/id_dsa.pub hadoop3:/root/
hadoop2# cat id_dsa.pub >> ~/.ssh/authorized_keys
hadoop3# cat id_dsa.pub >> ~/.ssh/authorized_keys
验证:从hadoop1登录到hadoop2和hadoop3,不再需要密码。hadoop1# scp ~/.ssh/id_dsa.pub hadoop2:/root/
hadoop1# scp ~/.ssh/id_dsa.pub hadoop3:/root/
hadoop2# cat id_dsa.pub >> ~/.ssh/authorized_keys
hadoop3# cat id_dsa.pub >> ~/.ssh/authorized_keys
[安装hadoop]
确保所有机器有 ssh rsync jdk
确保设置了:
export JAVA_HOME=/opt/soft/jdk
hive在0.20.x的hadoop做了大量的测试,因此选择0.20
cd /opt/soft/
wget http://mirror.bjtu.edu.cn/apache/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz tar -zxvf hadoop-0.20.2.tar.gz
cd hadoop-0.20.2/
vim .bashrc
export HADOOP_HOME=/opt/soft/hadoop-0.20.2
(重复以上操作到另外两机器)wget http://mirror.bjtu.edu.cn/apache/hadoop/core/hadoop-0.20.2/hadoop-0.20.2.tar.gz tar -zxvf hadoop-0.20.2.tar.gz
cd hadoop-0.20.2/
vim .bashrc
export HADOOP_HOME=/opt/soft/hadoop-0.20.2
[配置hadoop]
vim conf/core-site.xml
修改:
<configuration>
<property>
<!-- 用于dfs命令模块中指定默认的文件系统协议 -->
<name>fs.default.name</name>
<value>hdfs://hadoop1:9000</value>
</property>
</configuration>
vim conf/hdfs-site.xml<property>
<!-- 用于dfs命令模块中指定默认的文件系统协议 -->
<name>fs.default.name</name>
<value>hdfs://hadoop1:9000</value>
</property>
</configuration>
修改:
<configuration>
<property>
<!-- DFS中存储文件命名空间信息的目录 -->
<name>dfs.name.dir</name>
<value>/opt/hadoop/data/dfs.name.dir</value>
</property>
<property>
<!-- DFS中存储文件数据的目录 -->
<name>dfs.data.dir</name>
<value>/opt/hadoop/data/dfs.data.dir</value>
</property>
<property>
<!-- 是否对DFS中的文件进行权限控制(测试中一般用false)-->
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
vim conf/mapred-site.xml<property>
<!-- DFS中存储文件命名空间信息的目录 -->
<name>dfs.name.dir</name>
<value>/opt/hadoop/data/dfs.name.dir</value>
</property>
<property>
<!-- DFS中存储文件数据的目录 -->
<name>dfs.data.dir</name>
<value>/opt/hadoop/data/dfs.data.dir</value>
</property>
<property>
<!-- 是否对DFS中的文件进行权限控制(测试中一般用false)-->
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
修改:
<configuration>
<property>
<!-- 用来作JobTracker的节点的(一般与NameNode保持一致) -->
<name>mapred.job.tracker</name>
<value>hadoop1:9001</value>
</property>
<property>
<!-- map/reduce的系统目录(使用的HDFS的路径) -->
<name>mapred.system.dir</name>
<value>/opt/hadoop/system/mapred.system.dir</value>
</property>
<property>
<!-- map/reduce的临时目录(可使用“,”隔开,设置多重路径来分摊磁盘IO) -->
<name>mapred.local.dir</name>
<value>/opt/hadoop/data/mapred.local.dir</value>
</property>
</configuration>
vim masters<property>
<!-- 用来作JobTracker的节点的(一般与NameNode保持一致) -->
<name>mapred.job.tracker</name>
<value>hadoop1:9001</value>
</property>
<property>
<!-- map/reduce的系统目录(使用的HDFS的路径) -->
<name>mapred.system.dir</name>
<value>/opt/hadoop/system/mapred.system.dir</value>
</property>
<property>
<!-- map/reduce的临时目录(可使用“,”隔开,设置多重路径来分摊磁盘IO) -->
<name>mapred.local.dir</name>
<value>/opt/hadoop/data/mapred.local.dir</value>
</property>
</configuration>
hadoop1
vim slaveshadoop2
hadoop3
scp conf/* hadoop2:/opt/soft/hadoop-0.20.2/conf/hadoop3
scp conf/* hadoop3:/opt/soft/hadoop-0.20.2/conf/
[初始化]
cd $HADOOP_HOME/bin
./hadoop namenode -format
启动./hadoop namenode -format
./start-all.sh
[验证]
$HADOOP_HOME/bin/hadoop dfs -ls /
打开 http://192.168.100.52:50030 http://192.168.100.52:50070
[搭建hive集群]
下载
只需要在hadoop1机器上安装
cd /opt/soft/hadoop-0.20.2
wget http://mirror.bjtu.edu.cn/apache/hive/hive-0.7.0/hive-0.7.0.tar.gz tar zxvf hive-0.7.0.tar.gz
cd hive-0.7.0
vim ~/.bashrc
export HIVE_HOME=/opt/soft/hadoop-0.20.2/hive-0.7.0
$HIVE_HOME/bin/hive
>create table tt(id int,name string) row format delimited fields terminated by ',' collection items terminated by "\n" stored as textfile;
>select * from tt;
>drop table tt;
试玩结束。wget http://mirror.bjtu.edu.cn/apache/hive/hive-0.7.0/hive-0.7.0.tar.gz tar zxvf hive-0.7.0.tar.gz
cd hive-0.7.0
vim ~/.bashrc
export HIVE_HOME=/opt/soft/hadoop-0.20.2/hive-0.7.0
$HIVE_HOME/bin/hive
>create table tt(id int,name string) row format delimited fields terminated by ',' collection items terminated by "\n" stored as textfile;
>select * from tt;
>drop table tt;
[配置hive]
准备mysql:hadoop1 user:hadoop pwd:hadoop
>create database hive
>GRANT all ON hive.* TO hadoop@% IDENTIFIED BY 'hadoop';
>FLUSH PRIVILEGES ;
vim $HIVE_HOME/conf/hive-site.xml>GRANT all ON hive.* TO hadoop@% IDENTIFIED BY 'hadoop';
>FLUSH PRIVILEGES ;
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hive.metastore.local</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop1:3306/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hadoop</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hadoop</value>
</property>
</configuration>
[启动]<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hive.metastore.local</name>
<value>true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop1:3306/hive?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hadoop</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>hadoop</value>
</property>
</configuration>
复制一个mysql-connector-java-5.1.10.jar到hive/lib下后:
$HIVE_HOME/bin/hive
>create table tt(id int,name string) row format delimited fields terminated by ',' collection items terminated by "\n" stored as textfile;
如果报如下错:>create table tt(id int,name string) row format delimited fields terminated by ',' collection items terminated by "\n" stored as textfile;
FAILED: Error in metadata: javax.jdo.JDOException: Couldnt obtain a new sequence (unique id) : Binary logging not possible. Message: Transaction level 'READ-COMMITTED' in InnoDB is not safe for binlog mode 'STATEMENT'
退出hive后,以root进入mysql执行:>set global binlog_format='MIXED';
这是mysql的一个bug。安装结束。
相关文章推荐
- 在Hadoop基础上Hive的安装
- Mapr 安装hadoop的组件(四)——安装hive
- mac安装Hadoop和hive
- 手动安装cloudera cdh4.2 hadoop + hbase + hive(二)
- Hadoop 上 Hive 的安装与启动
- hive安装-构建在hadoop上的数据仓库
- 基于hadoop集群的hive 安装(mysql,derby)
- 基于hadoop集群的Hive1.2.1、Hbase1.2.2、Zookeeper3.4.8完全分布式安装
- HADOOP docker(四):安装hive
- [原创] Centos 安装部署 Hadoop 集群和 hive
- hadoop集群中安装hive+mysql
- hadoop入门第七步---hive部署安装(apache-hive-1.1.0)
- CentOS7基于Hadoop 2.7.3安装Hive 2.1.1
- Hadoop安装手记(ubuntu环境)
- hadoop集群配置之hive1.2.0安装部署(远程mysql)
- ubuntu安装配置hadoop和hive
- hadoop(1):centos 安装 hadoop & hive
- CentOS下SparkR安装部署:hadoop2.7.3+spark2.0.0+scale2.11.8+hive2.1.0
- Hadoop之hive安装过程以及运行常见问题