DFA算法过滤敏感词,替换为*
2012-08-14 20:11
316 查看
import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Enumeration; import java.util.List; import java.util.Properties; public class test { /** * 根节点 */ private TreeNode rootNode = new TreeNode(); /** * 关键词缓存 */ private ByteBuffer keywordBuffer = ByteBuffer.allocate(1024); /** * 关键词编码 */ private String charset = "utf-8"; /** * 创建DFA * @param keywordList * @throws UnsupportedEncodingException */ public void createKeywordTree(List<String> keywordList) throws UnsupportedEncodingException{ for (String keyword : keywordList) { if(keyword == null) continue; keyword = keyword.trim(); byte[] bytes = keyword.getBytes(charset); TreeNode tempNode = rootNode; for (int i = 0; i < bytes.length; i++) { int index = bytes[i] & 0xff; TreeNode node = tempNode.getSubNode(index); if(node == null){ node = new TreeNode(); tempNode.setSubNode(index, node); } tempNode = node; if(i == bytes.length - 1){ tempNode.setKeywordEnd(true); } } } } public String searchKeyword(String text) throws UnsupportedEncodingException{ return searchKeyword(text.getBytes(charset)); } public String searchKeyword(byte[] bytes){ StringBuilder words = new StringBuilder(); if(bytes == null || bytes.length == 0){ return words.toString(); } TreeNode tempNode = rootNode; int rollback = 0; int position = 0; while (position < bytes.length) { int index = bytes[position] & 0xFF; keywordBuffer.put(bytes[position]); tempNode = tempNode.getSubNode(index); if(tempNode == null){ position = position - rollback; rollback = 0; tempNode = rootNode; keywordBuffer.clear(); } else if(tempNode.isKeywordEnd()){ keywordBuffer.flip(); for (int i = 0; i <= rollback; i++) { bytes[position-i] = 42; } keywordBuffer.limit(keywordBuffer.capacity()); rollback = 1; }else{ rollback++; } position++; } String result = null; try { result = new String(bytes,"utf-8"); } catch (Exception e) { e.printStackTrace(); } return result; } public void setCharset(String charset) { this.charset = charset; } }
import java.util.ArrayList; import java.util.List; public class TreeNode { private static final int NODE_LEN = 256; /** * true 关键词的终结 ; false 继续 */ private boolean end = false; private List<TreeNode> subNodes = new ArrayList<TreeNode>(NODE_LEN); public TreeNode(){ for (int i = 0; i < NODE_LEN; i++) { subNodes.add(i, null); } } /** * 向指定位置添加节点树 * @param index * @param node */ public void setSubNode(int index, TreeNode node){ subNodes.set(index, node); } public TreeNode getSubNode(int index){ return subNodes.get(index); } public boolean isKeywordEnd() { return end; } public void setKeywordEnd(boolean end) { this.end = end; } }
相关文章推荐
- DFA算法过滤敏感词,替换为*
- DFA算法 及java版本实现敏感词过滤
- Java 敏感词替换-dfa算法,效率高
- Java使用DFA算法实现敏感词过滤
- 基于DFA算法实现过滤敏感词
- Java实现DFA算法对敏感词、广告词过滤功能示例
- Java实现DFA算法对敏感词、广告词过滤功能
- Java使用DFA算法实现过滤多家公司自定义敏感字功能详解
- 简单实现java DFA算法对敏感词过滤
- Java实现DFA算法进行敏感词过滤
- DFA算法实现Java敏感词过滤
- DFA算法实现过滤多家公司自定义敏感字
- DFA 算法实现敏感词过滤(python 实现)
- DFA算法 及java版本实现敏感词过滤
- Java实现DFA算法 实现敏感词过滤
- java实现敏感词过滤 dfa算法实现
- 基于DFA实现的敏感词过滤算法及在JFinal中的应用
- DFA算法实现Java敏感词过滤
- 高效敏感词过滤JAVA实现(DFA算法)
- Java DFA算法实现敏感词过滤