将html文件中的图片导出到某一文件夹或者生成xml文件
2008-11-11 14:55
435 查看
package net.risesoft.riseinfo.integration.parse;
import java.io.File;
import java.io.FileInputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import com.hothouseobjects.tags.Inspector;
import com.hothouseobjects.tags.Tag;
import com.hothouseobjects.tags.TagTiller;
/*
* 从html中将img标签的src属性解析出来,并对解析的图片地址进行处理,
* 主要是为了解决组织部中组工网不在OA上,但信息发布是用fckedit做的,
* 他的图片不能直接和数据一起通过webservices传到组工网上的问题
* $author sking huang $date 2008-11-11
*/
public class ParseHtml {
private String htmlSrc=null;
private Map tagList =new HashMap();
private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
.getLog(ParseHtml.class);
public ParseHtml(String htmlSrc){
this.htmlSrc = htmlSrc;
}
//向标签中列表中增加img标签
private void initTagToList(){
//增加取得<a href=...的html标签
// tagList.put("a", new String[]{"href"});
// 增加取得<img src=...的html标签
tagList.put("img", new String[]{"src"});
}
public void addTagList(String key,String[] value){
tagList.put(key, value);
}
public void remove(String key){
tagList.remove(key);
}
public List parse(){
List imageName=new ArrayList();
log.debug("********开始解析html标签***********");
//增加标签列表
initTagToList();
try{
Reader read = new StringReader(htmlSrc);
TagTiller tagtiller = new TagTiller(read);
tagtiller.runTiller();
Tag thePage = tagtiller.getTilledTags();
Set tagSet = tagList.entrySet();
Iterator iter = tagSet.iterator();
//从标签列表中取出要解析的标签,并将解析完的标签加入标签列表
while(iter.hasNext())
{
Map.Entry entry = (Map.Entry)iter.next();
String key = (String)entry.getKey();
String[] value = (String[]) entry.getValue();
if(key == null || "".equals(key)){
continue;
}
if(value == null || value.length==0){
continue;
}
List theHref = Inspector.collectByType(thePage,key);
int i = theHref.size();
while (i>0) {
for(int ii=0;ii<value.length;ii++){
String filterStr=filterStr(((Tag)theHref.get(i-1)).getAttributeValue(value[ii]));
if(filterStr!=null){
imageName.add(filterStr);
}
}
i -=1;
}
}
log.debug("********html标签解析完毕***********");
}catch(Exception e){
log.error("在解析html的过程中出现问题", e);
}
return imageName;
}
//对字符串进行过滤
private String filterStr(String addr){
if(addr==null) return addr;
StringTokenizer parser =new StringTokenizer(addr,"/"///");
String rtn="";
//取最后一个,因为最后一个为图片的名字
while(parser.hasMoreTokens()) {
rtn=parser.nextToken();
}
return rtn;
}
public static void main(String[] args) {
try {
File file = new File("d://ttt.htm");
int len = (int)file.length();
byte[] b;
b = new byte[len];
FileInputStream fis = new FileInputStream(file);
fis.read(b);
fis.close();
ParseHtml pp=new ParseHtml(new String(b));
List list =pp.parse();
for(int i=0;list.size()>0;i++){
System.out.println(list.get(i));
}
}
catch (Exception ex) {
ex.printStackTrace();
}
}
}
package net.risesoft.riseinfo.integration.parse;
import java.util.ArrayList;
import java.util.List;
public abstract class ExportImg {
private List imgList =new ArrayList();
private ParseHtml parseHtml =null;
private String imgSrc = null; // 图片存放文件夹
private String imgDest = null;// 图片将要被转移到文件夹,如果不需要转移图片可以调用两个构造函数的方法
public ExportImg(String srcHtml){
this(srcHtml,null,null);
}
public ExportImg(String srcHtml,String imgSrc){
this(srcHtml,imgSrc,null);
}
public ExportImg(String srcHtml,String imgSrc,String imgDest){
this.imgSrc=imgSrc;
this.imgDest=imgDest;
parseHtml =new ParseHtml(srcHtml);
}
public List getImgList() {
return imgList;
}
public void setImgList(List imgList) {
this.imgList = imgList;
}
public String export(){
imgList = parseHtml.parse();
return operate();
}
public abstract String operate();
public String getImgDest() {
return imgDest;
}
public void setImgDest(String imgDest) {
this.imgDest = imgDest;
}
public String getImgSrc() {
return imgSrc;
}
public void setImgSrc(String imgSrc) {
this.imgSrc = imgSrc;
}
}
package net.risesoft.riseinfo.integration.parse;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/*
*将fckedit中的图片取出并导入到指定目录
*@author sking huang
*@2008-11-11
*/
public class ExportImgToFile extends ExportImg {
public ExportImgToFile(String srcHtml, String imgSrc, String imgDest) {
super(srcHtml, imgSrc, imgDest);
}
private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
.getLog(ExportImgToFile.class);
// 在将文件从指定数据源拷贝到指定目录之前请先给图片列表赋值,图片列表中只存图片名称
public String operate() {
for (int i = 0; i < super.getImgList().size(); i++) {
String imageName = (String) super.getImgList().get(i);
try {
File file = new File(getImgSrc() + File.separator + imageName);
//如果文件存在且是文件
if (file.exists() && file.isFile()) {
FileInputStream input = new FileInputStream(file);
FileOutputStream output = new FileOutputStream(getImgDest()
+ File.separator + imageName);
byte[] b= new byte[1024];
int size=0;
while ((size = input.read(b)) != -1) {
output.write(b,0,size);
}
input.close();
output.close();
}
} catch (IOException e) {
log.error("文件导出过程中出现问题", e);
}
}
return null;
}
public static void main(String[] args) {
// 生成图片
try {
File file = new File("d://ttt.htm");
int len = (int) file.length();
byte[] b;
b = new byte[len];
FileInputStream fis = new FileInputStream(file);
fis.read(b);
fis.close();
ExportImg eif = new ExportImgToFile(new String(b), "D://ttt.files",
"D://img//dest");
eif.export();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
package net.risesoft.riseinfo.integration.parse;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import net.risesoft.integration.adapter.AdapterUtil;
public class ExportImgToXml extends ExportImg {
private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
.getLog(ExportImgToXml.class);
public ExportImgToXml(String srcHtml, String imgSrc) {
super(srcHtml, imgSrc);
}
public String operate() {
StringBuffer sb=new StringBuffer();
sb.append("<?xml version=/"1.0/" encoding=/"GB2312/"?>");
sb.append("<DATA>");
for (int i = 0; i < super.getImgList().size(); i++) {
String imageName = (String) super.getImgList().get(i);
try {
File file = new File(getImgSrc() + File.separator + imageName);
//如果文件存在且是文件
if (file.exists() && file.isFile()) {
sb.append("<IMGLIST>");
sb.append("<IMGNAME>" + file.getName() + "</IMGNAME>");
FileInputStream input = new FileInputStream(file);
byte[] b=new byte[(int)file.length()];
input.read(b);
sb.append("<IMGVALUE>" + AdapterUtil.base64Encode(b) +
"</IMGVALUE>");
sb.append("</IMGLIST>");
input.close();
}
} catch (Exception e) {
log.error("文件生成xml过程中出现问题", e);
}
}
sb.append("</DATA>");
return sb.toString();
}
//此方法为组工网一端接收xml的例子,只做参考用
public void parseXml(InputStream is){
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(is);
NodeList attnodeList = doc
.getElementsByTagName("IMGLIST");
int attlength = attnodeList.getLength();
for (int attIndex = 0; attIndex < attlength; attIndex++) { // 实际上,Attachment有0到n个
Node attnode = attnodeList.item(attIndex);
NodeList attlist = attnode.getChildNodes();
String fileName=null;
byte[] fileContent=null;
for (int j = 0; j < attlist.getLength(); j++) {
Node col = attlist.item(j);
if (col.getNodeName() == null) {
continue;
}
Node firstChild = col.getFirstChild();
if (firstChild == null) {
continue;
}
String value = firstChild.getNodeValue();
if (value == null && value.length() == 0) {
continue;
}
String field = col.getNodeName();
if (field.equals("IMGVALUE")) {
fileContent=AdapterUtil.base64Decode(value);
} else if (field.equals("IMGNAME")) {
fileName= value;
}
}
if(fileName!=null && fileContent!=null){
File file =new File("D://img//dest//"+fileName);
if(!file.exists())
file.createNewFile();
OutputStream fos =new FileOutputStream(file);
fos.write(fileContent);
fos.close();
}
}
log.info("**************数据写入成功********************");
} catch (Exception ex) {
log.error("附件写入出错了!", ex);
}
}
public static void main(String[] args) {
// 生成图片
try {
File file = new File("d://ttt.htm");
int len = (int) file.length();
byte[] b;
b = new byte[len];
FileInputStream fis = new FileInputStream(file);
fis.read(b);
fis.close();
ExportImgToXml eif = new ExportImgToXml(new String(b), "D://ttt.files");
String img=eif.export();
//System.out.println(img);
InputStream is =new ByteArrayInputStream(img.getBytes());
eif.parseXml(is);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
import java.io.File;
import java.io.FileInputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import com.hothouseobjects.tags.Inspector;
import com.hothouseobjects.tags.Tag;
import com.hothouseobjects.tags.TagTiller;
/*
* 从html中将img标签的src属性解析出来,并对解析的图片地址进行处理,
* 主要是为了解决组织部中组工网不在OA上,但信息发布是用fckedit做的,
* 他的图片不能直接和数据一起通过webservices传到组工网上的问题
* $author sking huang $date 2008-11-11
*/
public class ParseHtml {
private String htmlSrc=null;
private Map tagList =new HashMap();
private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
.getLog(ParseHtml.class);
public ParseHtml(String htmlSrc){
this.htmlSrc = htmlSrc;
}
//向标签中列表中增加img标签
private void initTagToList(){
//增加取得<a href=...的html标签
// tagList.put("a", new String[]{"href"});
// 增加取得<img src=...的html标签
tagList.put("img", new String[]{"src"});
}
public void addTagList(String key,String[] value){
tagList.put(key, value);
}
public void remove(String key){
tagList.remove(key);
}
public List parse(){
List imageName=new ArrayList();
log.debug("********开始解析html标签***********");
//增加标签列表
initTagToList();
try{
Reader read = new StringReader(htmlSrc);
TagTiller tagtiller = new TagTiller(read);
tagtiller.runTiller();
Tag thePage = tagtiller.getTilledTags();
Set tagSet = tagList.entrySet();
Iterator iter = tagSet.iterator();
//从标签列表中取出要解析的标签,并将解析完的标签加入标签列表
while(iter.hasNext())
{
Map.Entry entry = (Map.Entry)iter.next();
String key = (String)entry.getKey();
String[] value = (String[]) entry.getValue();
if(key == null || "".equals(key)){
continue;
}
if(value == null || value.length==0){
continue;
}
List theHref = Inspector.collectByType(thePage,key);
int i = theHref.size();
while (i>0) {
for(int ii=0;ii<value.length;ii++){
String filterStr=filterStr(((Tag)theHref.get(i-1)).getAttributeValue(value[ii]));
if(filterStr!=null){
imageName.add(filterStr);
}
}
i -=1;
}
}
log.debug("********html标签解析完毕***********");
}catch(Exception e){
log.error("在解析html的过程中出现问题", e);
}
return imageName;
}
//对字符串进行过滤
private String filterStr(String addr){
if(addr==null) return addr;
StringTokenizer parser =new StringTokenizer(addr,"/"///");
String rtn="";
//取最后一个,因为最后一个为图片的名字
while(parser.hasMoreTokens()) {
rtn=parser.nextToken();
}
return rtn;
}
public static void main(String[] args) {
try {
File file = new File("d://ttt.htm");
int len = (int)file.length();
byte[] b;
b = new byte[len];
FileInputStream fis = new FileInputStream(file);
fis.read(b);
fis.close();
ParseHtml pp=new ParseHtml(new String(b));
List list =pp.parse();
for(int i=0;list.size()>0;i++){
System.out.println(list.get(i));
}
}
catch (Exception ex) {
ex.printStackTrace();
}
}
}
package net.risesoft.riseinfo.integration.parse;
import java.util.ArrayList;
import java.util.List;
public abstract class ExportImg {
private List imgList =new ArrayList();
private ParseHtml parseHtml =null;
private String imgSrc = null; // 图片存放文件夹
private String imgDest = null;// 图片将要被转移到文件夹,如果不需要转移图片可以调用两个构造函数的方法
public ExportImg(String srcHtml){
this(srcHtml,null,null);
}
public ExportImg(String srcHtml,String imgSrc){
this(srcHtml,imgSrc,null);
}
public ExportImg(String srcHtml,String imgSrc,String imgDest){
this.imgSrc=imgSrc;
this.imgDest=imgDest;
parseHtml =new ParseHtml(srcHtml);
}
public List getImgList() {
return imgList;
}
public void setImgList(List imgList) {
this.imgList = imgList;
}
public String export(){
imgList = parseHtml.parse();
return operate();
}
public abstract String operate();
public String getImgDest() {
return imgDest;
}
public void setImgDest(String imgDest) {
this.imgDest = imgDest;
}
public String getImgSrc() {
return imgSrc;
}
public void setImgSrc(String imgSrc) {
this.imgSrc = imgSrc;
}
}
package net.risesoft.riseinfo.integration.parse;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/*
*将fckedit中的图片取出并导入到指定目录
*@author sking huang
*@2008-11-11
*/
public class ExportImgToFile extends ExportImg {
public ExportImgToFile(String srcHtml, String imgSrc, String imgDest) {
super(srcHtml, imgSrc, imgDest);
}
private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
.getLog(ExportImgToFile.class);
// 在将文件从指定数据源拷贝到指定目录之前请先给图片列表赋值,图片列表中只存图片名称
public String operate() {
for (int i = 0; i < super.getImgList().size(); i++) {
String imageName = (String) super.getImgList().get(i);
try {
File file = new File(getImgSrc() + File.separator + imageName);
//如果文件存在且是文件
if (file.exists() && file.isFile()) {
FileInputStream input = new FileInputStream(file);
FileOutputStream output = new FileOutputStream(getImgDest()
+ File.separator + imageName);
byte[] b= new byte[1024];
int size=0;
while ((size = input.read(b)) != -1) {
output.write(b,0,size);
}
input.close();
output.close();
}
} catch (IOException e) {
log.error("文件导出过程中出现问题", e);
}
}
return null;
}
public static void main(String[] args) {
// 生成图片
try {
File file = new File("d://ttt.htm");
int len = (int) file.length();
byte[] b;
b = new byte[len];
FileInputStream fis = new FileInputStream(file);
fis.read(b);
fis.close();
ExportImg eif = new ExportImgToFile(new String(b), "D://ttt.files",
"D://img//dest");
eif.export();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
package net.risesoft.riseinfo.integration.parse;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import net.risesoft.integration.adapter.AdapterUtil;
public class ExportImgToXml extends ExportImg {
private static org.apache.log4j.Logger log = net.risesoft.commons.log.LogFactory
.getLog(ExportImgToXml.class);
public ExportImgToXml(String srcHtml, String imgSrc) {
super(srcHtml, imgSrc);
}
public String operate() {
StringBuffer sb=new StringBuffer();
sb.append("<?xml version=/"1.0/" encoding=/"GB2312/"?>");
sb.append("<DATA>");
for (int i = 0; i < super.getImgList().size(); i++) {
String imageName = (String) super.getImgList().get(i);
try {
File file = new File(getImgSrc() + File.separator + imageName);
//如果文件存在且是文件
if (file.exists() && file.isFile()) {
sb.append("<IMGLIST>");
sb.append("<IMGNAME>" + file.getName() + "</IMGNAME>");
FileInputStream input = new FileInputStream(file);
byte[] b=new byte[(int)file.length()];
input.read(b);
sb.append("<IMGVALUE>" + AdapterUtil.base64Encode(b) +
"</IMGVALUE>");
sb.append("</IMGLIST>");
input.close();
}
} catch (Exception e) {
log.error("文件生成xml过程中出现问题", e);
}
}
sb.append("</DATA>");
return sb.toString();
}
//此方法为组工网一端接收xml的例子,只做参考用
public void parseXml(InputStream is){
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(is);
NodeList attnodeList = doc
.getElementsByTagName("IMGLIST");
int attlength = attnodeList.getLength();
for (int attIndex = 0; attIndex < attlength; attIndex++) { // 实际上,Attachment有0到n个
Node attnode = attnodeList.item(attIndex);
NodeList attlist = attnode.getChildNodes();
String fileName=null;
byte[] fileContent=null;
for (int j = 0; j < attlist.getLength(); j++) {
Node col = attlist.item(j);
if (col.getNodeName() == null) {
continue;
}
Node firstChild = col.getFirstChild();
if (firstChild == null) {
continue;
}
String value = firstChild.getNodeValue();
if (value == null && value.length() == 0) {
continue;
}
String field = col.getNodeName();
if (field.equals("IMGVALUE")) {
fileContent=AdapterUtil.base64Decode(value);
} else if (field.equals("IMGNAME")) {
fileName= value;
}
}
if(fileName!=null && fileContent!=null){
File file =new File("D://img//dest//"+fileName);
if(!file.exists())
file.createNewFile();
OutputStream fos =new FileOutputStream(file);
fos.write(fileContent);
fos.close();
}
}
log.info("**************数据写入成功********************");
} catch (Exception ex) {
log.error("附件写入出错了!", ex);
}
}
public static void main(String[] args) {
// 生成图片
try {
File file = new File("d://ttt.htm");
int len = (int) file.length();
byte[] b;
b = new byte[len];
FileInputStream fis = new FileInputStream(file);
fis.read(b);
fis.close();
ExportImgToXml eif = new ExportImgToXml(new String(b), "D://ttt.files");
String img=eif.export();
//System.out.println(img);
InputStream is =new ByteArrayInputStream(img.getBytes());
eif.parseXml(is);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
相关文章推荐
- 添加一个文件夹及一些文件如何使用git生成patch git补丁 新文件 图片资源文件
- 关于drawable文件夹下面的图片不能动态在R文件下面生成资源ID
- Itext中 根据html生成Word文件,包含图片
- 遍历指定文件夹下所有的xml文件并动态生成HTML页面!
- webview加载html时使用assets文件夹下的css或者js文件
- 生成某一文件夹内文件清单(批量处理)
- 遍历指定文件夹下所有的xml文件并动态生成HTML页面
- 导出不带.svn的文件夹或者是不含.class的文件
- 遍历指定文件夹下所有的xml文件并动态生成HTML页面
- [003]从文件夹中读取所有图片或者文件
- OpenCV_读取文件夹下的图片生成视频文件
- Java使用wkhtmltox实现HTML代码生成PDF文档或者图片
- 遍历工程中的文件夹的文件或者图片
- bat遍历生成文件夹下所有文件生成html连接
- 递归遍历磁盘下的某一文件夹中所有文件,并copy文件生成文件和带文件夹的文件
- 批量将导入的图片生成对应的元件调整为无损位图质量,设置链接项导出,发布并生成配置文件,fljs小命令脚本 - [jsfl]
- 赵祖辉 图片上传类;支持水印-日期文件夹-生成缩略图 ,支持多文件上传,
- 根据html生成Word文件,包含图片
- 绘制bitmap图片保存,生成ico文件或者对象
- android 文字或者图片生成.pdf文件