您的位置:首页 > Web前端 > HTML

将html文件中的图片导出到某一文件夹或者生成xml文件

2008-11-11 14:55 435 查看
package net.risesoft.riseinfo.integration.parse;

import java.io.File;

import java.io.FileInputStream;

import java.io.Reader;

import java.io.StringReader;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.Iterator;

import java.util.List;

import java.util.Map;

import java.util.Set;

import java.util.StringTokenizer;

import com.hothouseobjects.tags.Inspector;

import com.hothouseobjects.tags.Tag;

import com.hothouseobjects.tags.TagTiller;

/*

* 从html中将img标签的src属性解析出来,并对解析的图片地址进行处理,

* 主要是为了解决组织部中组工网不在OA上,但信息发布是用fckedit做的,

* 他的图片不能直接和数据一起通过webservices传到组工网上的问题

* $author sking huang $date 2008-11-11

*/

public class ParseHtml {

private String htmlSrc=null;

private Map tagList =new HashMap();

private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory

.getLog(ParseHtml.class);

public ParseHtml(String htmlSrc){

this.htmlSrc = htmlSrc;

}

//向标签中列表中增加img标签

private void initTagToList(){

//增加取得<a href=...的html标签

// tagList.put("a", new String[]{"href"});

// 增加取得<img src=...的html标签

tagList.put("img", new String[]{"src"});

}

public void addTagList(String key,String[] value){

tagList.put(key, value);

}

public void remove(String key){

tagList.remove(key);

}

public List parse(){

List imageName=new ArrayList();

log.debug("********开始解析html标签***********");

//增加标签列表

initTagToList();

try{

Reader read = new StringReader(htmlSrc);

TagTiller tagtiller = new TagTiller(read);

tagtiller.runTiller();

Tag thePage = tagtiller.getTilledTags();

Set tagSet = tagList.entrySet();

Iterator iter = tagSet.iterator();

//从标签列表中取出要解析的标签,并将解析完的标签加入标签列表

while(iter.hasNext())

{

Map.Entry entry = (Map.Entry)iter.next();

String key = (String)entry.getKey();

String[] value = (String[]) entry.getValue();

if(key == null || "".equals(key)){

continue;

}

if(value == null || value.length==0){

continue;

}

List theHref = Inspector.collectByType(thePage,key);

int i = theHref.size();

while (i>0) {

for(int ii=0;ii<value.length;ii++){

String filterStr=filterStr(((Tag)theHref.get(i-1)).getAttributeValue(value[ii]));

if(filterStr!=null){

imageName.add(filterStr);

}

}

i -=1;

}

}

log.debug("********html标签解析完毕***********");

}catch(Exception e){

log.error("在解析html的过程中出现问题", e);

}

return imageName;

}

//对字符串进行过滤

private String filterStr(String addr){

if(addr==null) return addr;

StringTokenizer parser =new StringTokenizer(addr,"/"///");

String rtn="";

//取最后一个,因为最后一个为图片的名字

while(parser.hasMoreTokens()) {

rtn=parser.nextToken();

}

return rtn;

}

public static void main(String[] args) {

try {

File file = new File("d://ttt.htm");

int len = (int)file.length();

byte[] b;

b = new byte[len];

FileInputStream fis = new FileInputStream(file);

fis.read(b);

fis.close();

ParseHtml pp=new ParseHtml(new String(b));

List list =pp.parse();

for(int i=0;list.size()>0;i++){

System.out.println(list.get(i));

}

}

catch (Exception ex) {

ex.printStackTrace();

}

}

}

package net.risesoft.riseinfo.integration.parse;

import java.util.ArrayList;

import java.util.List;

public abstract class ExportImg {

private List imgList =new ArrayList();

private ParseHtml parseHtml =null;

private String imgSrc = null; // 图片存放文件夹

private String imgDest = null;// 图片将要被转移到文件夹,如果不需要转移图片可以调用两个构造函数的方法

public ExportImg(String srcHtml){

this(srcHtml,null,null);

}

public ExportImg(String srcHtml,String imgSrc){

this(srcHtml,imgSrc,null);

}

public ExportImg(String srcHtml,String imgSrc,String imgDest){

this.imgSrc=imgSrc;

this.imgDest=imgDest;

parseHtml =new ParseHtml(srcHtml);

}

public List getImgList() {

return imgList;

}

public void setImgList(List imgList) {

this.imgList = imgList;

}

public String export(){

imgList = parseHtml.parse();

return operate();

}

public abstract String operate();

public String getImgDest() {

return imgDest;

}

public void setImgDest(String imgDest) {

this.imgDest = imgDest;

}

public String getImgSrc() {

return imgSrc;

}

public void setImgSrc(String imgSrc) {

this.imgSrc = imgSrc;

}

}

package net.risesoft.riseinfo.integration.parse;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.util.ArrayList;

import java.util.List;

/*

*将fckedit中的图片取出并导入到指定目录

*@author sking huang

*@2008-11-11

*/

public class ExportImgToFile extends ExportImg {

public ExportImgToFile(String srcHtml, String imgSrc, String imgDest) {

super(srcHtml, imgSrc, imgDest);

}

private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory

.getLog(ExportImgToFile.class);

// 在将文件从指定数据源拷贝到指定目录之前请先给图片列表赋值,图片列表中只存图片名称

public String operate() {

for (int i = 0; i < super.getImgList().size(); i++) {

String imageName = (String) super.getImgList().get(i);

try {

File file = new File(getImgSrc() + File.separator + imageName);

//如果文件存在且是文件

if (file.exists() && file.isFile()) {

FileInputStream input = new FileInputStream(file);

FileOutputStream output = new FileOutputStream(getImgDest()

+ File.separator + imageName);

byte[] b= new byte[1024];

int size=0;

while ((size = input.read(b)) != -1) {

output.write(b,0,size);

}

input.close();

output.close();

}

} catch (IOException e) {

log.error("文件导出过程中出现问题", e);

}

}

return null;

}

public static void main(String[] args) {

// 生成图片

try {

File file = new File("d://ttt.htm");

int len = (int) file.length();

byte[] b;

b = new byte[len];

FileInputStream fis = new FileInputStream(file);

fis.read(b);

fis.close();

ExportImg eif = new ExportImgToFile(new String(b), "D://ttt.files",

"D://img//dest");

eif.export();

} catch (Exception ex) {

ex.printStackTrace();

}

}

}

package net.risesoft.riseinfo.integration.parse;

import java.io.ByteArrayInputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.InputStream;

import java.io.OutputStream;

import javax.xml.parsers.DocumentBuilder;

import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;

import org.w3c.dom.Node;

import org.w3c.dom.NodeList;

import net.risesoft.integration.adapter.AdapterUtil;

public class ExportImgToXml extends ExportImg {

private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory

.getLog(ExportImgToXml.class);

public ExportImgToXml(String srcHtml, String imgSrc) {

super(srcHtml, imgSrc);

}

public String operate() {

StringBuffer sb=new StringBuffer();

sb.append("<?xml version=/"1.0/" encoding=/"GB2312/"?>");

sb.append("<DATA>");

for (int i = 0; i < super.getImgList().size(); i++) {

String imageName = (String) super.getImgList().get(i);

try {

File file = new File(getImgSrc() + File.separator + imageName);

//如果文件存在且是文件

if (file.exists() && file.isFile()) {

sb.append("<IMGLIST>");

sb.append("<IMGNAME>" + file.getName() + "</IMGNAME>");

FileInputStream input = new FileInputStream(file);

byte[] b=new byte[(int)file.length()];

input.read(b);

sb.append("<IMGVALUE>" + AdapterUtil.base64Encode(b) +

"</IMGVALUE>");

sb.append("</IMGLIST>");

input.close();

}

} catch (Exception e) {

log.error("文件生成xml过程中出现问题", e);

}

}

sb.append("</DATA>");

return sb.toString();

}

//此方法为组工网一端接收xml的例子,只做参考用

public void parseXml(InputStream is){

try {

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

factory.setNamespaceAware(true);

factory.setValidating(true);

DocumentBuilder builder = factory.newDocumentBuilder();

Document doc = builder.parse(is);

NodeList attnodeList = doc

.getElementsByTagName("IMGLIST");

int attlength = attnodeList.getLength();

for (int attIndex = 0; attIndex < attlength; attIndex++) { // 实际上,Attachment有0到n个

Node attnode = attnodeList.item(attIndex);

NodeList attlist = attnode.getChildNodes();

String fileName=null;

byte[] fileContent=null;

for (int j = 0; j < attlist.getLength(); j++) {

Node col = attlist.item(j);

if (col.getNodeName() == null) {

continue;

}

Node firstChild = col.getFirstChild();

if (firstChild == null) {

continue;

}

String value = firstChild.getNodeValue();

if (value == null && value.length() == 0) {

continue;

}

String field = col.getNodeName();

if (field.equals("IMGVALUE")) {

fileContent=AdapterUtil.base64Decode(value);

} else if (field.equals("IMGNAME")) {

fileName= value;

}

}

if(fileName!=null && fileContent!=null){

File file =new File("D://img//dest//"+fileName);

if(!file.exists())

file.createNewFile();

OutputStream fos =new FileOutputStream(file);

fos.write(fileContent);

fos.close();

}

}

log.info("**************数据写入成功********************");

} catch (Exception ex) {

log.error("附件写入出错了!", ex);

}

}

public static void main(String[] args) {

// 生成图片

try {

File file = new File("d://ttt.htm");

int len = (int) file.length();

byte[] b;

b = new byte[len];

FileInputStream fis = new FileInputStream(file);

fis.read(b);

fis.close();

ExportImgToXml eif = new ExportImgToXml(new String(b), "D://ttt.files");

String img=eif.export();

//System.out.println(img);

InputStream is =new ByteArrayInputStream(img.getBytes());

eif.parseXml(is);

} catch (Exception ex) {

ex.printStackTrace();

}

}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: