操作html字符串
2016-03-16 13:28
453 查看
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Test {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
// String htmlStr = "<b><font www=\"red\" pp='oo'>aaaaa</font><font www=\"red\" pp='oo'>aaaaa</font></b> ";
String htmlStr = "<p><span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p><p><span>"
+ "<img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p>"
+ "<p>hahahahha<span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p> ";
//System.out.println("zyj "+Test.updateHtmlTag(htmlStr, "img", "src","http://www.redirect.com/xxx?url=\""));
System.out.println(imgSum(htmlStr));
}
/**
* @param htmlStr html文本
* @param searchTag 要修改的目标标签
* @param searchAttrib 目标标签中的属性
* @param newStr 修改值
*/
public static String updateHtmlTag(String htmlStr, String searchTag,
String searchAttrib,String newStr) {
// String regxpForTag = "<\\s*img\\s+([^>]*)\\s*>"; // 找出IMG标签
// String regxpForTagAttrib = "src=\"([^\"]+)\""; // 找出IMG标签的SRC属性
String regxpForTag ="<\\s*" + searchTag + "\\s+([^>]*)\\s*>";
String regxpForTagAttrib = searchAttrib + "\\s*=\\s*[\"|']([^\"|']+)[\"|']";
Pattern patternForTag = Pattern.compile(regxpForTag);
Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<"+searchTag +" ");
System.out.println(matcherForTag.group(1));
Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag
.group(1));
if (matcherForAttrib.find()) {
System.out.println(matcherForAttrib.group(1));
matcherForAttrib.appendReplacement(sbreplace, searchAttrib+"=\""+newStr);
}
// matcherForTag.appendReplacement(sb, sbreplace.toString());
matcherForAttrib.appendTail(sbreplace);
matcherForTag.appendReplacement(sb, sbreplace.toString()+">");
result = matcherForTag.find();
}
matcherForTag.appendTail(sb);
return sb.toString();
}
/**
* 删除input字符串中的html格式
*
* @param input
* @param length
* @return
*/
public static String splitAndFilterString(String input) {
if (input == null || input.trim().equals("")) {
return "";
}
// 去掉所有html元素,
String str = input.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll(
"<[^>]*>", "").replaceAll("[(/>)<]", "");
return str;
}
/**
*计算字符窜中标签img标签的个数
*/
public static int imgSum(String htmlStr){
String regxpForTag ="<\\s*" + "img" + "\\s+([^>]*)\\s*>";
int count = 0;
Pattern patternForTag = Pattern.compile(regxpForTag);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<img" );
System.out.println(matcherForTag.group(1));
count++;
result = matcherForTag.find();
}
return count;
}
}
import java.util.regex.Pattern;
public class Test {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
// String htmlStr = "<b><font www=\"red\" pp='oo'>aaaaa</font><font www=\"red\" pp='oo'>aaaaa</font></b> ";
String htmlStr = "<p><span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p><p><span>"
+ "<img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p>"
+ "<p>hahahahha<span><img src=\'http://imagesv5.vivame.cn/pic/upload/phote/201511/07516786-7d60-4ca5-ad11-b6c8bb82d99d.jpg\' border=0></span></p> ";
//System.out.println("zyj "+Test.updateHtmlTag(htmlStr, "img", "src","http://www.redirect.com/xxx?url=\""));
System.out.println(imgSum(htmlStr));
}
/**
* @param htmlStr html文本
* @param searchTag 要修改的目标标签
* @param searchAttrib 目标标签中的属性
* @param newStr 修改值
*/
public static String updateHtmlTag(String htmlStr, String searchTag,
String searchAttrib,String newStr) {
// String regxpForTag = "<\\s*img\\s+([^>]*)\\s*>"; // 找出IMG标签
// String regxpForTagAttrib = "src=\"([^\"]+)\""; // 找出IMG标签的SRC属性
String regxpForTag ="<\\s*" + searchTag + "\\s+([^>]*)\\s*>";
String regxpForTagAttrib = searchAttrib + "\\s*=\\s*[\"|']([^\"|']+)[\"|']";
Pattern patternForTag = Pattern.compile(regxpForTag);
Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<"+searchTag +" ");
System.out.println(matcherForTag.group(1));
Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag
.group(1));
if (matcherForAttrib.find()) {
System.out.println(matcherForAttrib.group(1));
matcherForAttrib.appendReplacement(sbreplace, searchAttrib+"=\""+newStr);
}
// matcherForTag.appendReplacement(sb, sbreplace.toString());
matcherForAttrib.appendTail(sbreplace);
matcherForTag.appendReplacement(sb, sbreplace.toString()+">");
result = matcherForTag.find();
}
matcherForTag.appendTail(sb);
return sb.toString();
}
/**
* 删除input字符串中的html格式
*
* @param input
* @param length
* @return
*/
public static String splitAndFilterString(String input) {
if (input == null || input.trim().equals("")) {
return "";
}
// 去掉所有html元素,
String str = input.replaceAll("\\&[a-zA-Z]{1,10};", "").replaceAll(
"<[^>]*>", "").replaceAll("[(/>)<]", "");
return str;
}
/**
*计算字符窜中标签img标签的个数
*/
public static int imgSum(String htmlStr){
String regxpForTag ="<\\s*" + "img" + "\\s+([^>]*)\\s*>";
int count = 0;
Pattern patternForTag = Pattern.compile(regxpForTag);
Matcher matcherForTag = patternForTag.matcher(htmlStr);
StringBuffer sb = new StringBuffer();
boolean result = matcherForTag.find();
while (result) {
StringBuffer sbreplace = new StringBuffer("<img" );
System.out.println(matcherForTag.group(1));
count++;
result = matcherForTag.find();
}
return count;
}
}
相关文章推荐
- HTML <script>标签的type类型
- gulp之文件合并以及整合html中的script和link
- 点击按钮,显示多条子菜单(html)
- HTML代码编码规范
- HTML语义化(2016/3/16更新)
- Angular处理Html转移问题
- Html基本操作实例代码
- html
- 如何在线预览github上的html页面?
- Html position(static | absolute | fixed | relative)定位
- 认识html标签
- 关于a标签伪类中的visited不起作用问题
- html上传图片之前在网页预览实现
- html标签元素分类
- freemarker生成HTML页面时,遇到乱码的解决办法
- doc-remote-debugging.html
- html实现radio按钮选中后显示输入框和提示
- html嵌套MP4、PDF的简单方案
- HTML
- HTML固定的底栏(flex布局)