spatialhadoop2.3源码阅读(六) grid 索引生成方法(二)
2015-12-15 10:06
411 查看
上一篇主要介绍了grid 索引生成中MapReduce Job 的一些配置信息,接下来将主要介绍Map,Reduce等MapReduce的具体实现。
1. Map
configure() 函数从SpatialHadoop的配置类中取出网格信息。
map函数主要是遍历网格信息,判断当前输入的数据与哪一个网格相交,然后将网格id和当前输入数据一起输出。
其中cellMbr为当前输入数据所属文件的最小包围矩形。
2.
3 outputCommiter
commiter的作用是将生成的所有包含_master的文件合并为同一个,生成_master.grid文件
最后的输出文件:
在输出文件下,_master.grid为索引文件,具体内容如下
同时在输入文件下也会生成一个索引文件,命名为_master.heap,具体内容介绍见spatialhadoop2.3源码阅读(四) FileMBR类
1. Map
public static class RepartitionMap<T extends Shape> extends MapReduceBase implements Mapper<Rectangle, T, IntWritable, T> { /**List of cells used by the mapper*/ private CellInfo[] cellInfos; /**Used to output intermediate records*/ private IntWritable cellId = new IntWritable(); @Override public void configure(JobConf job) { try { cellInfos = SpatialSite.getCells(job); super.configure(job); } catch (IOException e) { e.printStackTrace(); } } /** * Map function * @param dummy * @param shape * @param output * @param reporter * @throws IOException */ public void map(Rectangle cellMbr, T shape, OutputCollector<IntWritable, T> output, Reporter reporter) throws IOException { Rectangle shape_mbr = shape.getMBR(); if (shape_mbr == null) return; // Only send shape to output if its lowest corner lies in the cellMBR // This ensures that a replicated shape in an already partitioned file // doesn't get send to output from all partitions if (!cellMbr.isValid() || cellMbr.contains(shape_mbr.x1, shape_mbr.y1)) { for (int cellIndex = 0; cellIndex < cellInfos.length; cellIndex++) { if (cellInfos[cellIndex].isIntersected(shape_mbr)) { cellId.set((int) cellInfos[cellIndex].cellId); output.collect(cellId, shape); } } } } }
configure() 函数从SpatialHadoop的配置类中取出网格信息。
map函数主要是遍历网格信息,判断当前输入的数据与哪一个网格相交,然后将网格id和当前输入数据一起输出。
其中cellMbr为当前输入数据所属文件的最小包围矩形。
2.
public static class RepartitionReduce<T extends Shape> extends MapReduceBase implements Reducer<IntWritable, T, IntWritable, T> { @Override public void reduce(IntWritable cellIndex, Iterator<T> shapes, OutputCollector<IntWritable, T> output, Reporter reporter) throws IOException { T shape = null; while (shapes.hasNext()) { shape = shapes.next(); output.collect(cellIndex, shape); } // Close cell output.collect(new IntWritable(-cellIndex.get()), shape); } }reduce中输出与map基本相同,区别在于当一个cell的所有shape输出完毕之后,会输出一个结束标记。
3 outputCommiter
public static class RepartitionOutputCommitter extends FileOutputCommitter { @Override public void commitJob(JobContext context) throws IOException { super.commitJob(context); JobConf job = context.getJobConf(); Path outPath = GridOutputFormat.getOutputPath(job); FileSystem outFs = outPath.getFileSystem(job); // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(outPath, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); if (resultFiles.length == 0) { LOG.warn("No _master files were written by reducers"); } else { String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(outPath, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); } } }
commiter的作用是将生成的所有包含_master的文件合并为同一个,生成_master.grid文件
最后的输出文件:
在输出文件下,_master.grid为索引文件,具体内容如下
3,-77.12098637142859,-89.9678417,-25.745067357142858,-47.811636574999994,191,55738,part-00000_data_00003 5,25.63085165714284,-89.9678417,77.00677067142851,-47.811636574999994,1,221,part-00000_data_00005_2 7,128.3826896857143,-89.9678417,179.7586087,-47.811636574999994,1,235,part-00000_data_00007_4 8,-179.8728244,-47.811636574999994,-128.49690538571429,-5.655431449999995,626,157951,part-00000_data_00008_6 9,-128.49690538571429,-47.811636574999994,-77.12098637142859,-5.655431449999995,1600,341406,part-00000_data_00009_8 10,-77.12098637142859,-47.811636574999994,-25.745067357142858,-5.655431449999995,93385,24015785,part-00000_data_00010_10 11,-25.745067357142858,-47.811636574999994,25.63085165714284,-5.655431449999995,3999,1071489,part-00000_data_00011_12 12,25.63085165714284,-47.811636574999994,77.00677067142851,-5.655431449999995,8068,2431070,part-00000_data_00012_14 13,77.00677067142851,-47.811636574999994,128.3826896857143,-5.655431449999995,10301,3542696,part-00000_data_00013_16 14,128.3826896857143,-47.811636574999994,179.7586087,-5.655431449999995,54419,21435043,part-00000_data_00014_18 15,-179.8728244,-5.655431449999995,-128.49690538571429,36.500773675000005,1117,947649,part-00000_data_00015_20 16,-128.49690538571429,-5.655431449999995,-77.12098637142859,36.500773675000005,103692,49054236,part-00000_data_00016_22 17,-77.12098637142859,-5.655431449999995,-25.745067357142858,36.500773675000005,19652,6238733,part-00000_data_00017_24 18,-25.745067357142858,-5.655431449999995,25.63085165714284,36.500773675000005,22265,6842792,part-00000_data_00018_26 19,25.63085165714284,-5.655431449999995,77.00677067142851,36.500773675000005,33397,9795129,part-00000_data_00019_28 20,77.00677067142851,-5.655431449999995,128.3826896857143,36.500773675000005,50484,16074825,part-00000_data_00020_30 21,128.3826896857143,-5.655431449999995,179.7586087,36.500773675000005,25300,9308858,part-00000_data_00021_32 22,-179.8728244,36.500773675000005,-128.49690538571429,78.6569788,554,216779,part-00000_data_00022_34 23,-128.49690538571429,36.500773675000005,-77.12098637142859,78.6569788,176706,80332822,part-00000_data_00023_36 24,-77.12098637142859,36.500773675000005,-25.745067357142858,78.6569788,79614,46142781,part-00000_data_00024_38 25,-25.745067357142858,36.500773675000005,25.63085165714284,78.6569788,1142227,359092803,part-00000_data_00025_40 26,25.63085165714284,36.500773675000005,77.00677067142851,78.6569788,112315,35125566,part-00000_data_00026_42 27,77.00677067142851,36.500773675000005,128.3826896857143,78.6569788,12004,3930258,part-00000_data_00027_44 28,128.3826896857143,36.500773675000005,179.7586087,78.6569788,8399,2691664,part-00000_data_00028_46每一行分别代表一个输出文件,字段含义为cellid,x1,y1,x2,y2,记录数,文件大小,文件名
同时在输入文件下也会生成一个索引文件,命名为_master.heap,具体内容介绍见spatialhadoop2.3源码阅读(四) FileMBR类
相关文章推荐
- Linux Shell的两种加密方式
- 如何让我们的网站搜索功能飞起来!(系列2)
- linux c 多现线程
- linux openssh升级步骤
- python 启动shell报错Subprocess Startup Error
- 如何做基于粉丝的视频主播网站?
- 重装windows后修复CentOS的Grub
- Linux 课程
- yael在CentOS6.5上的安装
- apache的UseCanonicalName
- hadoop执行jar流程分析
- docker【3】docker镜像容器安装
- 酷炫!趣味十足的Linux命令
- 《game design workshop》阅读笔记(三)——Game Design Basics——Working with Formal Elements
- 使用 docker+tmux 加强容器调度
- 换tomcat得不到结果出现404错误
- powershell
- o2cgrep工具搜索被编译为".o"的C文件
- Linux 配置:Xmanager连接Linux图形界面
- Linux 配置:Xmanager连接Linux图形界面