MapReduce直接连接MySQL获取数据
2015-04-23 16:50
218 查看
MySQL中数据:
mysql> select * from linuxidc_tbls;
+---------------------+----------------+
| TBL_NAME | TBL_TYPE |
+---------------------+----------------+
| linuxidc_test_table | EXTERNAL_TABLE |
| linuxidc_t | MANAGED_TABLE |
| linuxidc_t1 | MANAGED_TABLE |
| tt | MANAGED_TABLE |
| tab_partition | MANAGED_TABLE |
| linuxidc_hbase_table_1 | MANAGED_TABLE |
| linuxidc_hbase_user_info | MANAGED_TABLE |
| t | EXTERNAL_TABLE |
| linuxidc_jobid | MANAGED_TABLE |
+---------------------+----------------+
9 rows in set (0.01 sec)
mysql> select * from linuxidc_tbls where TBL_NAME like 'linuxidc%' order by TBL_NAME;
+---------------------+----------------+
| TBL_NAME | TBL_TYPE |
+---------------------+----------------+
| linuxidc_hbase_table_1 | MANAGED_TABLE |
| linuxidc_hbase_user_info | MANAGED_TABLE |
| linuxidc_jobid | MANAGED_TABLE |
| linuxidc_t | MANAGED_TABLE |
| linuxidc_t1 | MANAGED_TABLE |
| linuxidc_test_table | EXTERNAL_TABLE |
+---------------------+----------------+
6 rows in set (0.00 sec)
MapReduce程序代码,ConnMysql.java:
package com.linuxidc.study;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;
import org.apache.Hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class ConnMysql {
private static Configuration conf = new Configuration();
static {
conf.addResource(new Path("F:/linuxidc-hadoop/hdfs-site.xml"));
conf.addResource(new Path("F:/linuxidc-hadoop/mapred-site.xml"));
conf.addResource(new Path("F:/linuxidc-hadoop/core-site.xml"));
conf.set("mapred.job.tracker", "10.133.103.21:50021");
}
public static class TblsRecord implements Writable, DBWritable {
String tbl_name;
String tbl_type;
public TblsRecord() {
}
@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setString(1, this.tbl_name);
statement.setString(2, this.tbl_type);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
this.tbl_name = resultSet.getString(1);
this.tbl_type = resultSet.getString(2);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
Text.writeString(out, this.tbl_name);
Text.writeString(out, this.tbl_type);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.tbl_name = Text.readString(in);
this.tbl_type = Text.readString(in);
}
public String toString() {
return new String(this.tbl_name + " " + this.tbl_type);
}
}
public static class ConnMysqlMapper extends Mapper<LongWritable,TblsRecord,Text,Text> {
public void map(LongWritable key,TblsRecord values,Context context)
throws IOException,InterruptedException {
context.write(new Text(values.tbl_name), new Text(values.tbl_type));
}
}
public static class ConnMysqlReducer extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key,Iterable<Text> values,Context context)
throws IOException,InterruptedException {
for(Iterator<Text> itr = values.iterator();itr.hasNext();) {
context.write(key, itr.next());
}
}
}
public static void main(String[] args) throws Exception {
Path output = new Path("/user/linuxidc/output/");
FileSystem fs = FileSystem.get(URI.create(output.toString()), conf);
if (fs.exists(output)) {
fs.delete(output);
}
//mysql的jdbc驱动
DistributedCache.addFileToClassPath(new Path(
"hdfs://hd022-test.nh.sdo.com/user/liuxiaowen/mysql-connector-java-5.1.13-bin.jar"), conf);
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
"jdbc:mysql://10.133.103.22:3306/hive", "hive", "hive");
Job job = new Job(conf,"test mysql connection");
job.setJarByClass(ConnMysql.class);
job.setMapperClass(ConnMysqlMapper.class);
job.setReducerClass(ConnMysqlReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(DBInputFormat.class);
FileOutputFormat.setOutputPath(job, output);
//列名
String[] fields = { "TBL_NAME", "TBL_TYPE" };
//六个参数分别为:
//1.Job;2.Class<? extends DBWritable>
//3.表名;4.where条件
//5.order by语句;6.列名
DBInputFormat.setInput(job, TblsRecord.class,
"linuxidc_tbls", "TBL_NAME like 'linuxidc%'", "TBL_NAME", fields);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
运行结果:
[www.linuxidc.com @linux ~]$ hadoop fs -cat /user/linuxidc/output/part-r-00000
linuxidc_hbase_table_1 MANAGED_TABLE
linuxidc_hbase_user_info MANAGED_TABLE
linuxidc_jobid MANAGED_TABLE
linuxidc_t MANAGED_TABLE
linuxidc_t1 MANAGED_TABLE
linuxidc_test_table EXTERNAL_TABLE
注:DBInputFormat.setInput(JobConf job, Class<?extends DBWritable> inputClass, String tableName, String conditions,String orderBy, String... fieldNames),这个方法的参数很容易看懂,inputClass实现DBWritable接口。,string
tableName表名, conditions表示查询的条件,orderby表示排序的条件,fieldNames是字段,这相当与把sql语句拆分的结果。当然也可以用sql语句进行重载。etInput(JobConf job, Class<?extends DBWritable> inputClass, String inputQuery, StringinputCountQuery)。
mysql> select * from linuxidc_tbls;
+---------------------+----------------+
| TBL_NAME | TBL_TYPE |
+---------------------+----------------+
| linuxidc_test_table | EXTERNAL_TABLE |
| linuxidc_t | MANAGED_TABLE |
| linuxidc_t1 | MANAGED_TABLE |
| tt | MANAGED_TABLE |
| tab_partition | MANAGED_TABLE |
| linuxidc_hbase_table_1 | MANAGED_TABLE |
| linuxidc_hbase_user_info | MANAGED_TABLE |
| t | EXTERNAL_TABLE |
| linuxidc_jobid | MANAGED_TABLE |
+---------------------+----------------+
9 rows in set (0.01 sec)
mysql> select * from linuxidc_tbls where TBL_NAME like 'linuxidc%' order by TBL_NAME;
+---------------------+----------------+
| TBL_NAME | TBL_TYPE |
+---------------------+----------------+
| linuxidc_hbase_table_1 | MANAGED_TABLE |
| linuxidc_hbase_user_info | MANAGED_TABLE |
| linuxidc_jobid | MANAGED_TABLE |
| linuxidc_t | MANAGED_TABLE |
| linuxidc_t1 | MANAGED_TABLE |
| linuxidc_test_table | EXTERNAL_TABLE |
+---------------------+----------------+
6 rows in set (0.00 sec)
MapReduce程序代码,ConnMysql.java:
package com.linuxidc.study;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;
import org.apache.Hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class ConnMysql {
private static Configuration conf = new Configuration();
static {
conf.addResource(new Path("F:/linuxidc-hadoop/hdfs-site.xml"));
conf.addResource(new Path("F:/linuxidc-hadoop/mapred-site.xml"));
conf.addResource(new Path("F:/linuxidc-hadoop/core-site.xml"));
conf.set("mapred.job.tracker", "10.133.103.21:50021");
}
public static class TblsRecord implements Writable, DBWritable {
String tbl_name;
String tbl_type;
public TblsRecord() {
}
@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setString(1, this.tbl_name);
statement.setString(2, this.tbl_type);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
this.tbl_name = resultSet.getString(1);
this.tbl_type = resultSet.getString(2);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
Text.writeString(out, this.tbl_name);
Text.writeString(out, this.tbl_type);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.tbl_name = Text.readString(in);
this.tbl_type = Text.readString(in);
}
public String toString() {
return new String(this.tbl_name + " " + this.tbl_type);
}
}
public static class ConnMysqlMapper extends Mapper<LongWritable,TblsRecord,Text,Text> {
public void map(LongWritable key,TblsRecord values,Context context)
throws IOException,InterruptedException {
context.write(new Text(values.tbl_name), new Text(values.tbl_type));
}
}
public static class ConnMysqlReducer extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key,Iterable<Text> values,Context context)
throws IOException,InterruptedException {
for(Iterator<Text> itr = values.iterator();itr.hasNext();) {
context.write(key, itr.next());
}
}
}
public static void main(String[] args) throws Exception {
Path output = new Path("/user/linuxidc/output/");
FileSystem fs = FileSystem.get(URI.create(output.toString()), conf);
if (fs.exists(output)) {
fs.delete(output);
}
//mysql的jdbc驱动
DistributedCache.addFileToClassPath(new Path(
"hdfs://hd022-test.nh.sdo.com/user/liuxiaowen/mysql-connector-java-5.1.13-bin.jar"), conf);
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
"jdbc:mysql://10.133.103.22:3306/hive", "hive", "hive");
Job job = new Job(conf,"test mysql connection");
job.setJarByClass(ConnMysql.class);
job.setMapperClass(ConnMysqlMapper.class);
job.setReducerClass(ConnMysqlReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(DBInputFormat.class);
FileOutputFormat.setOutputPath(job, output);
//列名
String[] fields = { "TBL_NAME", "TBL_TYPE" };
//六个参数分别为:
//1.Job;2.Class<? extends DBWritable>
//3.表名;4.where条件
//5.order by语句;6.列名
DBInputFormat.setInput(job, TblsRecord.class,
"linuxidc_tbls", "TBL_NAME like 'linuxidc%'", "TBL_NAME", fields);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
运行结果:
[www.linuxidc.com @linux ~]$ hadoop fs -cat /user/linuxidc/output/part-r-00000
linuxidc_hbase_table_1 MANAGED_TABLE
linuxidc_hbase_user_info MANAGED_TABLE
linuxidc_jobid MANAGED_TABLE
linuxidc_t MANAGED_TABLE
linuxidc_t1 MANAGED_TABLE
linuxidc_test_table EXTERNAL_TABLE
注:DBInputFormat.setInput(JobConf job, Class<?extends DBWritable> inputClass, String tableName, String conditions,String orderBy, String... fieldNames),这个方法的参数很容易看懂,inputClass实现DBWritable接口。,string
tableName表名, conditions表示查询的条件,orderby表示排序的条件,fieldNames是字段,这相当与把sql语句拆分的结果。当然也可以用sql语句进行重载。etInput(JobConf job, Class<?extends DBWritable> inputClass, String inputQuery, StringinputCountQuery)。
相关文章推荐
- MapReduce直接连接Mysql获取数据
- MapReduce直接连接Mysql获取数据 (新API写法)
- 使用R进行数据可视化套路之-多重散点图、连接Mysql获取数据
- JSP 连接 MySQL 数据库&获取数据库数据
- php7用mysqli连接mysql获取数据乱码问题
- ajax连接jsp或servlet,获取MySql为数据
- 如何用 ajax 连接mysql数据库,并且获取从中返回的数据。ajax获取从mysql返回的数据。responseXML分别输出不同数据的方法。
- Mysql 出现故障应用直接中断连接导致数据被锁(生产故障)详解
- 从数据库获取下拉菜单数据 原生连接mysql语句
- 直接连接*.mdf 文件 获取随机数据
- mysql 连接查询指的是将两张表或多张表关联到一起进行查询,获取一个表的行与另一个表的行匹配的数据。常见的连接查询包括内连接(等值连接)、左(外)连接、右(外)连接和交叉连接(完全连接)等
- java连接mysql获取数据保存为json格式
- ASP连接MYSQL并读取数据
- 使用mysql备份工具innobackupex将本地数据直接备份到远端服务器、备份、恢复操作实例
- nodejs同步调用获取mysql数据大坑
- MySql同时查询三个不关联的表,同时获取到三个表的数据,并按照某一字段排序
- JDBC连接mysql获取结果集行数
- 随机获取Mysql数据表的一条或多条记录
- MYSQL客户机程序4&#8212;在运行时获取连接参数
- android不能直接连接mysql