Mapreduce构建hbase二级索引
2015-10-01 17:08
423 查看
import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.GenericOptionsParser; public class IndexBuilder { private class MyMapper extends TableMapper<ImmutableBytesWritable, Put> { private Map<byte[], ImmutableBytesWritable> indexes = new HashMap<byte[], ImmutableBytesWritable>(); private String columnFamily; @Override protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException { Set<byte[]> keys = indexes.keySet(); for (byte[] k : keys) { ImmutableBytesWritable indexTableName = indexes.get(k); byte[] val = value.getValue(Bytes.toBytes(columnFamily), k); Put put = new Put(val);// 索引表的rowkey为原始表的值 put.add(Bytes.toBytes("f1"), Bytes.toBytes("id"), key.get());// 索引表的内容为原始表的rowkey context.write(indexTableName, put); } } @Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); String tableName = conf.get("tableName"); columnFamily = conf.get("columnFamily"); String[] qualifiers = conf.getStrings("qualifiers"); // indexes的key为列名,value为索引表名 for (String q : qualifiers) { indexes.put( Bytes.toBytes(q), new ImmutableBytesWritable(Bytes.toBytes(tableName + "-" + q))); } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = HBaseConfiguration.create(); String[] otherargs = new GenericOptionsParser(conf, args) .getRemainingArgs();// 去除掉没有用的命令行参数 // 输入参数:表名,列族名,列名 if (otherargs.length < 3) { System.exit(-1); } String tableName = otherargs[0]; String columnFamily = otherargs[1]; conf.set("tableName", tableName); conf.set("columnFamily", columnFamily); String[] qualifiers = new String[otherargs.length - 2]; for (int i = 0; i < qualifiers.length; i++) { qualifiers[i] = otherargs[i + 2]; } conf.setStrings("qualifiers", qualifiers); Job job = new Job(conf, tableName); job.setJarByClass(IndexBuilder.class); job.setMapperClass(MyMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); // 可以输出多张表 job.setOutputFormatClass(MultiTableOutputFormat.class); Scan scan = new Scan(); scan.setCaching(1000); TableMapReduceUtil.initTableMapperJob(tableName, scan, MyMapper.class, ImmutableBytesWritable.class, Put.class, job); job.waitForCompletion(true); } }
相关文章推荐
- c++ 单例模式
- Eclipse 常用快捷键
- C语言及程序设计.第二十三课.项目2.乱玩数字
- javaweb多窗口展示网页
- QT程序制作deb包并安装在应用程序菜单
- JPA && Spring Data && Spring Data JPA
- 工厂模式---java代码实现
- 八大排序算法(Python实现)
- matlab图像显示程序模板
- C++命名空间
- Matlab - 求方差-均值-均方差-协方差的函数
- Qt全局热键(windows篇)(使用RegisterHotKey和句柄进行注册)
- zw版【转发·台湾nvp系列Delphi例程】HALCON InpaintingCt2
- Java小技巧
- java中抽象类与接口的不同之处
- TP:C3BCA2F7
- Qt中 QString 和int, char等的“相互”转换,关键是QString.toLocal8Bit().data();
- zw版【转发·台湾nvp系列Delphi例程】HALCON InpaintingCt1
- 如何创建虚拟硬盘 + os 读取硬盘参数代码
- Hugo探究