您的位置:首页 > Web前端 > HTML

POI解析DOC,转换为HTML

2013-01-29 10:09 274 查看

环境

poi-3.9

代码

public class WordToHtml {

private static final String encoding = "UTF-8";

public static String convert2Html(String wordPath)
throws FileNotFoundException, TransformerException, IOException,
ParserConfigurationException {
if( wordPath == null || "".equals(wordPath) ) return "";
File file = new File(wordPath);
if( file.exists() && file.isFile() )
return convert2Html(new FileInputStream(file));
else
return "";
}

public static String convert2Html(InputStream is)
throws TransformerException, IOException,
ParserConfigurationException {
HWPFDocument wordDocument = new HWPFDocument(is);
WordToHtmlConverter converter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());

// 添加图片前缀,以防图片重复覆盖
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");
final String prefix = sdf.format(new Date());

converter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return prefix + "_" + suggestedName;
}
});
converter.processDocument(wordDocument);

List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for(Picture pic : pics) {
try {
pic.writeImageContent(new FileOutputStream(
"/" + prefix + "_" + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}

StringWriter writer = new StringWriter();

Transformer serializer = TransformerFactory.newInstance().newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, encoding);
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(
new DOMSource(converter.getDocument()),
new StreamResult(writer) );
writer.close();
return writer.toString();
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: