您的位置:首页 > 编程语言 > Java开发

java html字符串处理

2014-01-02 19:20 429 查看
package com.jarry;

public class RegularizedHtml {

public static void main(String[] args) {
String initHtml = "<p style = text-align:center;margin:10px; font-size:12px; width=30px height=100px class=class1 class2 ><strong  class =class3 class4 >文本内容 </strong><img src=http://img1.jpg/><em   class= class1 class2></em><img src= http://img2.jpg /></p>";
System.out.println(regularized(initHtml));
}

public static String regularized(String initHtml) {
initHtml = initHtml.replaceAll("/>", " />");//处理单标签,确保标签符号与属性分隔开
StringBuffer htmlSb = new StringBuffer();
String[] splitStrs = initHtml.split(">");
for (String string : splitStrs) {

if(string.startsWith("<") //只处理开始标签
&& !string.startsWith("</") //过滤掉结束标签
){
System.out.println("\n----------------------------------");
System.out.println(string);
System.out.println("==================================");
//下面将开始标签先按空格分隔,然后从后面开始处理,插入到一个StringBuffer中
String[] properties = string.split(" ");
StringBuffer stringBuffer = new StringBuffer();

boolean isCompleteValue = true;//是否处理完一个属性值,从后往前遍历,遇到=时表示处理完一个属性值
boolean isCompleteProperty = true;//是否处理完一个属性, 添加完一个=后,再添加一个值,则表示一个属性添加完成
/*
* stringBuffer 组合规则:
*    初始状态isCompleteValue=true;isCompleteProperty = true;
*  遍历properties元素
*  如果是/(单标签结束符),那么直接追加
*  当isCompleteValue=true;isCompleteProperty=true;时stringBuffer插入元素需要追加",并将两个值都设置为false
*  当isCompleteValue=false;isCompleteProperty=false;时stringBuffer只需要插入元素,这时的元素为属性值
*  当遇到=时,此时isCompleteValue=false;isCompleteProperty=false;插入元素前,将=换成=",并设置isCompleteValue=true;如果元素中包含属性名称,那么设置isCompleteProperty=true;否则表示下一元素是属性名称即在下一次设置
*  当isCompleteValue=true;isCompleteProperty=false;时stringBuffer需要插入元素,这时的元素为属性名称,并设置isCompleteProperty=true;
*  最后将标签名插入到stringBuffer
*/

//这里i > 0结束。忽略"<标签名",单独插入
for (int i = properties.length-1; i > 0; i--) {
System.out.println(properties[i]+"; isCompleteValue="+isCompleteValue+";isCompleteProperty="+isCompleteProperty);
if("/".equals(properties[i])){//单标签 结束符
stringBuffer.append("/");
continue;
}
if("".equals(properties[i])){
continue;
}
if(properties[i].contains("=")){//新的属性开始,前一属性结束
//System.out.println("properties[i] contains(=); isCompleteValue="+isCompleteValue+";isCompleteProperty="+isCompleteProperty);
if("=".equals(properties[i])){//单独的=
stringBuffer.insert(0, "=\"");
}else if(properties[i].startsWith("=")){//以=开头,只带属性值
if(isCompleteValue && isCompleteProperty){
stringBuffer.insert(0, properties[i].replaceFirst("=", "=\"")+"\" ");
isCompleteProperty = false;
}else{
stringBuffer.insert(0, properties[i].replaceFirst("=", "=\"")+" ");
}
}else if(properties[i].endsWith("=")){//以=结尾,只带属性名
stringBuffer.insert(0, properties[i].replaceFirst("=", "=\""));
isCompleteProperty = true;
}else{//属性名=属性值形式
if(isCompleteValue && isCompleteProperty){
stringBuffer.insert(0, properties[i].replaceFirst("=", "=\"")+"\" ");
}else{
stringBuffer.insert(0, properties[i].replaceFirst("=", "=\"")+" ");
}
isCompleteProperty = true;
}
isCompleteValue = true;
}else{//属性名或者属性值
if(isCompleteValue && isCompleteProperty){
stringBuffer.insert(0, properties[i]+"\" ");
isCompleteValue = false;
isCompleteProperty = false;
}else if(isCompleteValue && !isCompleteProperty){
stringBuffer.insert(0, properties[i]);
isCompleteProperty = true;
}else{//!isCompleteValue && !isCompleteProperty
stringBuffer.insert(0, properties[i]+" ");
}
}
}
stringBuffer.insert(0, properties[0]+" ");
System.out.println(stringBuffer);
htmlSb.append(stringBuffer.toString().trim());
}else {
htmlSb.append(string);
}
htmlSb.append(">");
}

return htmlSb.toString();
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: