您的位置:首页 > 数据库 > MySQL

MapReduce直接连接MySQL获取数据

2015-04-23 16:50 218 查看
MySQL中数据:

mysql> select * from linuxidc_tbls;

+---------------------+----------------+

| TBL_NAME | TBL_TYPE |

+---------------------+----------------+

| linuxidc_test_table | EXTERNAL_TABLE |

| linuxidc_t | MANAGED_TABLE |

| linuxidc_t1 | MANAGED_TABLE |

| tt | MANAGED_TABLE |

| tab_partition | MANAGED_TABLE |

| linuxidc_hbase_table_1 | MANAGED_TABLE |

| linuxidc_hbase_user_info | MANAGED_TABLE |

| t | EXTERNAL_TABLE |

| linuxidc_jobid | MANAGED_TABLE |

+---------------------+----------------+

9 rows in set (0.01 sec)

mysql> select * from linuxidc_tbls where TBL_NAME like 'linuxidc%' order by TBL_NAME;

+---------------------+----------------+

| TBL_NAME | TBL_TYPE |

+---------------------+----------------+

| linuxidc_hbase_table_1 | MANAGED_TABLE |

| linuxidc_hbase_user_info | MANAGED_TABLE |

| linuxidc_jobid | MANAGED_TABLE |

| linuxidc_t | MANAGED_TABLE |

| linuxidc_t1 | MANAGED_TABLE |

| linuxidc_test_table | EXTERNAL_TABLE |

+---------------------+----------------+

6 rows in set (0.00 sec)

MapReduce程序代码,ConnMysql.java:

package com.linuxidc.study;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import java.net.URI;

import java.sql.PreparedStatement;

import java.sql.ResultSet;

import java.sql.SQLException;

import java.util.Iterator;

import org.apache.Hadoop.conf.Configuration;

import org.apache.hadoop.filecache.DistributedCache;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.Writable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;

import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;

import org.apache.hadoop.mapreduce.lib.db.DBWritable;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class ConnMysql {



private static Configuration conf = new Configuration();



static {

conf.addResource(new Path("F:/linuxidc-hadoop/hdfs-site.xml"));

conf.addResource(new Path("F:/linuxidc-hadoop/mapred-site.xml"));

conf.addResource(new Path("F:/linuxidc-hadoop/core-site.xml"));

conf.set("mapred.job.tracker", "10.133.103.21:50021");

}



public static class TblsRecord implements Writable, DBWritable {

String tbl_name;

String tbl_type;

public TblsRecord() {

}

@Override

public void write(PreparedStatement statement) throws SQLException {

// TODO Auto-generated method stub

statement.setString(1, this.tbl_name);

statement.setString(2, this.tbl_type);

}

@Override

public void readFields(ResultSet resultSet) throws SQLException {

// TODO Auto-generated method stub

this.tbl_name = resultSet.getString(1);

this.tbl_type = resultSet.getString(2);

}

@Override

public void write(DataOutput out) throws IOException {

// TODO Auto-generated method stub

Text.writeString(out, this.tbl_name);

Text.writeString(out, this.tbl_type);

}

@Override

public void readFields(DataInput in) throws IOException {

// TODO Auto-generated method stub

this.tbl_name = Text.readString(in);

this.tbl_type = Text.readString(in);

}

public String toString() {

return new String(this.tbl_name + " " + this.tbl_type);

}

}

public static class ConnMysqlMapper extends Mapper<LongWritable,TblsRecord,Text,Text> {

public void map(LongWritable key,TblsRecord values,Context context)

throws IOException,InterruptedException {

context.write(new Text(values.tbl_name), new Text(values.tbl_type));

}

}



public static class ConnMysqlReducer extends Reducer<Text,Text,Text,Text> {

public void reduce(Text key,Iterable<Text> values,Context context)

throws IOException,InterruptedException {

for(Iterator<Text> itr = values.iterator();itr.hasNext();) {

context.write(key, itr.next());

}

}

}



public static void main(String[] args) throws Exception {

Path output = new Path("/user/linuxidc/output/");



FileSystem fs = FileSystem.get(URI.create(output.toString()), conf);

if (fs.exists(output)) {

fs.delete(output);

}



//mysql的jdbc驱动

DistributedCache.addFileToClassPath(new Path(

"hdfs://hd022-test.nh.sdo.com/user/liuxiaowen/mysql-connector-java-5.1.13-bin.jar"), conf);



DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",

"jdbc:mysql://10.133.103.22:3306/hive", "hive", "hive");



Job job = new Job(conf,"test mysql connection");

job.setJarByClass(ConnMysql.class);



job.setMapperClass(ConnMysqlMapper.class);

job.setReducerClass(ConnMysqlReducer.class);



job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);



job.setInputFormatClass(DBInputFormat.class);

FileOutputFormat.setOutputPath(job, output);



//列名

String[] fields = { "TBL_NAME", "TBL_TYPE" };

//六个参数分别为:

//1.Job;2.Class<? extends DBWritable>

//3.表名;4.where条件

//5.order by语句;6.列名

DBInputFormat.setInput(job, TblsRecord.class,

"linuxidc_tbls", "TBL_NAME like 'linuxidc%'", "TBL_NAME", fields);



System.exit(job.waitForCompletion(true) ? 0 : 1);

}



}

运行结果:

[www.linuxidc.com @linux ~]$ hadoop fs -cat /user/linuxidc/output/part-r-00000

linuxidc_hbase_table_1 MANAGED_TABLE

linuxidc_hbase_user_info MANAGED_TABLE

linuxidc_jobid MANAGED_TABLE

linuxidc_t MANAGED_TABLE

linuxidc_t1 MANAGED_TABLE

linuxidc_test_table EXTERNAL_TABLE

注:DBInputFormat.setInput(JobConf job, Class<?extends DBWritable> inputClass, String tableName, String conditions,String orderBy, String... fieldNames),这个方法的参数很容易看懂,inputClass实现DBWritable接口。,string
tableName表名, conditions表示查询的条件,orderby表示排序的条件,fieldNames是字段,这相当与把sql语句拆分的结果。当然也可以用sql语句进行重载。etInput(JobConf job, Class<?extends DBWritable> inputClass, String inputQuery, StringinputCountQuery)。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: