Storm实现单词计数
2014-11-02 13:59
267 查看
package com.mengyao.storm; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.apache.commons.io.FileUtils; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.BaseRichBolt; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; /** * Storm中的单词计数,拓扑结构为InputSpout->SplitBolt->CountBolt = WordCountTopology * @author mengyao * */ @SuppressWarnings("all") public class WordCountTopology { public static class InputSpout extends BaseRichSpout{ private Map conf; private TopologyContext context; private SpoutOutputCollector collector; /** * 实例化该Spout时预处理,仅会被调用一次,类似于MapReduce中Mapper/Reducer的setup()方法 */ @Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.conf = conf; this.context = context; this.collector = collector; } /** * 死循环发射每行消息 */ @Override public void nextTuple() { Collection<File> listFiles = FileUtils.listFiles(new File("D:/"), new String[]{"log"}, false); for (File file : listFiles) { try { List<String> lines = FileUtils.readLines(file); for (String line : lines) { this.collector.emit(new Values(line)); System.err.println("==== InputSpout:"+line+" ===="); } FileUtils.moveFile(file, new File(file.getAbsoluteFile()+".tmp")); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } } } /** * 声明字段“line”提供给下一个Bolt组件订阅 */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("line")); } } public static class SplitBolt extends BaseRichBolt{ private Map stormConf; private TopologyContext context; private OutputCollector collector; /** * 实例化该Bolt时预处理,仅会被调用一次,类似于MapReduce中Mapper/Reducer的setup()方法 */ @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.stormConf = stormConf; this.context = context; this.collector = collector; } /** * 死循环发送每个单词 */ @Override public void execute(Tuple input) { String line = input.getStringByField("line"); String[] words = line.split("\t"); for (String word : words) { this.collector.emit(new Values(word)); System.err.println("==== SplitBolt:"+word+" ===="); } } /** * 声明字段“word”提供给下一个Bolt组件订阅 */ @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("word")); } } public static class CountBolt extends BaseRichBolt{ private Map stormConf; private TopologyContext context; private OutputCollector collector; HashMap<String, Long> map = new HashMap<String, Long>(); /** * 实例化该Bolt时预处理,仅会被调用一次,类似于MapReduce中Mapper/Reducer的setup()方法 */ @Override public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) { this.stormConf = stormConf; this.context = context; this.collector = collector; } @Override public void execute(Tuple input) { String word = input.getStringByField("word"); Long value = map.get(word); if (value==null) { value=0L; } value++; map.put(word, value); for (Entry<String, Long> entry : map.entrySet()) { System.err.println("==== CountBolt:"+entry+" ===="); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { } } public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException { String topologyName = WordCountTopology.class.getSimpleName(); TopologyBuilder builder = new TopologyBuilder(); builder.setSpout("input", new InputSpout()); builder.setBolt("split", new SplitBolt()).shuffleGrouping("input"); builder.setBolt("count", new CountBolt()).shuffleGrouping("split"); Config config = new Config(); config.setDebug(true); if (args!=null && args.length>0) { //如果是生产环境中使用集群模式提交拓扑 config.setNumWorkers(3); StormSubmitter.submitTopology(topologyName, config, builder.createTopology()); } else { //否则使用本地模式提交拓扑 LocalCluster cluster = new LocalCluster(); cluster.submitTopology(topologyName, config, builder.createTopology()); Utils.sleep(1000*100); cluster.killTopology(topologyName); cluster.shutdown(); } } } 依赖的jar包如下图:
相关文章推荐
- MapReduce实现的简单单词计数--------总结
- Storm 实现滑动窗口计数和TopN排序
- 单词计数程序在hadoop上的实现
- 005-采用storm程序对目录下文件的单词计数本地模式运行
- storm trident实现单词计数
- C语言实现对输入中的行数,单词个数,字符的个数进行计数
- java 通过Spark实现单词计数的功能
- 【Storm】storm安装、配置、使用以及Storm单词计数程序的实例分析
- 自定义实现InputFormat、OutputFormat、输出到多个文件目录中去、hadoop1.x api写单词计数的例子、运行时接收命令行参数,代码例子
- scala 两种方法实现单词计数
- python实现指定目录下批量文件的单词计数:串行版本
- python实现指定目录下JAVA文件单词计数的多进程版本
- Storm 实现滑动窗口计数和TopN排序<转>
- 2014年华为校招机试题和多种代码实现(多个数的最大公约数,相同子串,单词计数)
- Storm实验 -- 单词计数2
- Hadoop--map/reduce实现单词计数
- python实现指定目录下批量文件的单词计数:并发版本
- Storm实例:实时单词计数
- python实现单词计数的mapreduce
- 重复ip(单词计数)不同语言实现对比