应用POI,word2007转html
2015-03-04 16:49
309 查看
poi 3.9
http://poi.apache.org/
Java代码
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
//import org.junit.Assert;
//import org.junit.Test;
public class word07toHtml {
//@Test
public static void canExtractImage() throws IOException {
File f = new File("d:/test/test.docx");
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
// 1) Load DOCX into XWPFDocument
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
// 2) Prepare XHTML options (here we set the IURIResolver to
// load images from a "word/media" folder)
File imageFolderFile = new File("d:/test/media");
XHTMLOptions options = XHTMLOptions.create().URIResolver(
new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
//options.setIgnoreStylesIfUnused(false);
//options.setFragment(true);
// 3) Convert XWPFDocument to XHTML
OutputStream out = new FileOutputStream(new File(
"d:/test/test.htm"));
XHTMLConverter.getInstance().convert(document, out, options);
} else {
System.out.println("Enter only MS Office 2007+ files");
}
}
}
public static void main(String args[]) {
try {
canExtractImage();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
其中org.apache.poi.xwpf.converter需要扩展包
如果你的项目用到了maven做如下配置即可,若果没用maven,请从本文附件下载
1.0.4 对应的是 poi 3.9
1.0.0 对应的是 poi 3.8
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
所需jar包
Xml代码
<dependencies>
<dependency>
<groupId> fr.opensagres.xdocreport</groupId>
<artifactId> org.apache.poi.xwpf.converter.core</artifactId>
<version> 1.0.4</version>
</dependency>
<dependency>
<groupId> fr.opensagres.xdocreport</groupId>
<artifactId> org.apache.poi.xwpf.converter.xhtml</artifactId>
<version> 1.0.4</version>
</dependency>
</dependencies>
如果报错:
java.lang.ClassNotFoundException: org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTSectPrImpl$1HeaderReferenceList
请添加 ooxml-schemas-1.1.jar
java.lang.ClassNotFoundException: org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTBodyImpl$1TblList
也是需要 ooxml-schemas-1.1.jar
用maven的会自动下来,没用maven的请从本文附件下载ooxml-schemas-1.1.rar,需要解压
不过,发现转换后的table没有边框,有待解决
http://poi.apache.org/
Java代码
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
//import org.junit.Assert;
//import org.junit.Test;
public class word07toHtml {
//@Test
public static void canExtractImage() throws IOException {
File f = new File("d:/test/test.docx");
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
// 1) Load DOCX into XWPFDocument
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
// 2) Prepare XHTML options (here we set the IURIResolver to
// load images from a "word/media" folder)
File imageFolderFile = new File("d:/test/media");
XHTMLOptions options = XHTMLOptions.create().URIResolver(
new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
//options.setIgnoreStylesIfUnused(false);
//options.setFragment(true);
// 3) Convert XWPFDocument to XHTML
OutputStream out = new FileOutputStream(new File(
"d:/test/test.htm"));
XHTMLConverter.getInstance().convert(document, out, options);
} else {
System.out.println("Enter only MS Office 2007+ files");
}
}
}
public static void main(String args[]) {
try {
canExtractImage();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
其中org.apache.poi.xwpf.converter需要扩展包
如果你的项目用到了maven做如下配置即可,若果没用maven,请从本文附件下载
1.0.4 对应的是 poi 3.9
1.0.0 对应的是 poi 3.8
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
所需jar包
Xml代码
<dependencies>
<dependency>
<groupId> fr.opensagres.xdocreport</groupId>
<artifactId> org.apache.poi.xwpf.converter.core</artifactId>
<version> 1.0.4</version>
</dependency>
<dependency>
<groupId> fr.opensagres.xdocreport</groupId>
<artifactId> org.apache.poi.xwpf.converter.xhtml</artifactId>
<version> 1.0.4</version>
</dependency>
</dependencies>
如果报错:
java.lang.ClassNotFoundException: org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTSectPrImpl$1HeaderReferenceList
请添加 ooxml-schemas-1.1.jar
java.lang.ClassNotFoundException: org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTBodyImpl$1TblList
也是需要 ooxml-schemas-1.1.jar
用maven的会自动下来,没用maven的请从本文附件下载ooxml-schemas-1.1.rar,需要解压
不过,发现转换后的table没有边框,有待解决
相关文章推荐
- POI 将Excle2003,Excle2007,word2003,word2007转换为html
- java通过Apache开源框架POI读取Word2003和Word2007转换成html的demo实例
- html标签mailto的六则应用技巧
- xml应用(1):使用xmldom在服务器端生成静态html页面
- C#运用正则表达式智能获取html模版页中模版信息的应用
- datagrid的几点简单应用,功能包括radio的单选,checkbox的多选,鼠标移动到行上颜色变深,链接传递多参数等功能。这里radio和checkbox都使用的是HTML控件。
- POI 简介及简单应用
- AJAX在静态HTML页面中实现权限控制的应用
- 一句话去掉HTML标记!(正则的应用)
- 视频下载:HTML基础及应用
- html单选框radio的简单应用
- [导入]xml应用(1):使用xmldom在服务器端生成静态html页面
- 编写 "纯HTML" jsp应用--学会使用 JSTL
- PEAR::HTML_QuickForm与Smarty 的结合应用
- 基于B/S体系结构开发应用系统 (转载http://lw.mumayi.com/htmldata/96/100/2005_11/article-33794_1.html)
- 编写 "纯HTML" jsp应用--学会使用 JSTL
- 编写 "纯HTML" jsp应用--学会使用 JSTL
- Struts源码研究 - html-Cancel标签应用注意事项
- 如何显示或隐藏导航栏treeview(frame在html中的应用)
- HTML教程-HTML技巧-层的高级应用