您的位置:首页 > Web前端 > HTML

docx、ppt、xls、pdf文件转html

2017-04-07 17:01 337 查看
场景:

后台上传的文档要再前端app上在线预览

解决办法:

将文档转成html用于前端显示

maven引入
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.xdocreport.document</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.5</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.9</version>
</dependency>


具体代码:

1、docx转html

/**
* docx文件转html
* @param tempContextUrl 项目访问名
* @return
*/
public int Word2007ToHtml(String tempContextUrl) {
int rv = 0;
try {
String path =  presentationDto.getWordPath();
//word路径
String wordPath = path.substring(0, path.indexOf("upload")+6);
//word文件名
String wordName = path.substring(path.lastIndexOf(File.separator)+1,path.lastIndexOf("."));
//后缀
String suffix = path.substring(path.lastIndexOf("."));
//生成html路径
String htmlPath = wordPath + File.separator + System.currentTimeMillis() + "_show" + File.separator;
//生成html文件名
String htmlName = System.currentTimeMillis() + ".html";
//图片路径
String imagePath = htmlPath + "image" + File.separator;

//判断html文件是否存在
File htmlFile = new File(htmlPath + htmlName);

//word文件
File wordFile = new File(wordPath + File.separator + wordName + suffix);

// 1) 加载word文档生成 XWPFDocument对象
InputStream in = new FileInputStream(wordFile);
XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
File imgFolder = new File(imagePath);
XHTMLOptions options = XHTMLOptions.create();
options.setExtractor(new FileImageExtractor(imgFolder));
//html中图片的路径 相对路径
options.URIResolver(new BasicURIResolver("image"));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);

// 3) 将 XWPFDocument转换成XHTML
//生成html文件上级文件夹
File folder = new File(htmlPath);
if(!folder.exists()){
folder.mkdirs();
}
OutputStream out = new FileOutputStream(htmlFile);
XHTMLConverter.getInstance().convert(document, out, options);

// 4) 转换为项目访问路径
String absolutePath = htmlFile.getAbsolutePath();
htmlPath = tempContextUrl + absolutePath.substring(absolutePath.indexOf("upload"));
presentationDto.setHtmlPath(htmlPath);
} catch (FileNotFoundException e) {
e.printStackTrace();
return rv;
} catch (XWPFConverterException e) {
e.printStackTrace();
return rv;
} catch (IOException e) {
e.printStackTrace();
return rv;
}
rv = 1;
return rv;
}


2、xls转html

private int xlsToHtml(String tempContextUrl){
int rv = 0;
String path =  presentationDto.getWordPath();
//word路径
String wordPath = path.substring(0, path.indexOf("upload")+6) + File.separator;
//word文件名
String wordName = path.substring(path.lastIndexOf(File.separator)+1);
try {
InputStream input=new FileInputStream(wordPath+wordName);
HSSFWorkbook excelBook=new HSSFWorkbook(input);
ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
excelToHtmlConverter.processWorkbook(excelBook);
List pics = excelBook.getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get (i);
try {
pic.writeImageContent (new FileOutputStream (wordPath + pic.suggestFullFileName() ) );
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument =excelToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource (htmlDocument);
StreamResult streamResult = new StreamResult (outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty (OutputKeys.INDENT, "yes");
serializer.setOutputProperty (OutputKeys.METHOD, "html");
serializer.transform (domSource, streamResult);
outStream.close();

String content = new String (outStream.toByteArray(),"utf-8");

String uuid = UidUtil.generateUUID();
FileUtils.writeStringToFile(new File(wordPath, uuid+".html"), content, "utf-8");
presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");
} catch (Exception e) {
e.printStackTrace();
return rv;
}
rv = 1;
return rv;
}


3、ppt转html

其实只是ppt转图片,有了图片后放到页面上去显示。

/**
* ppt转html
* @param tempContextUrl
* @return
*/
private int pptToHtml(String tempContextUrl){
int rv = 0;
String path = presentationDto.getWordPath();
//word路径
String wordPath = path.substring(0, path.indexOf("upload")+6);
//文件夹名
String folderName = UidUtil.generateUUID();
List<String> imgList = new ArrayList<String>();
File file = new File(path);

File folder = new File(wordPath + File.separator + folderName);
try {
folder.mkdirs();
FileInputStream is = new FileInputStream(file);
SlideShow ppt = new SlideShow(is);
is.close();
Dimension pgsize = ppt.getPageSize();
org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides();
for (int i = 0; i < slide.length; i++) {
TextRun[] truns = slide[i].getTextRuns();
for ( int k=0;k<truns.length;k++){
RichTextRun[] rtruns = truns[k].getRichTextRuns();
for(int l=0;l<rtruns.length;l++){
rtruns[l].setFontIndex(1);
rtruns[l].setFontName("宋体");
}
}
BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = img.createGraphics();
graphics.setPaint(Color.BLUE);
graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height));
slide[i].draw(graphics);

// 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径
String imgName = File.separator + folderName + File.separator +"pict_"+ (i + 1) + ".jpeg";

FileOutputStream out = new FileOutputStream(wordPath + imgName);
javax.imageio.ImageIO.write(img, "jpeg", out);
out.close();

imgList.add(File.separator + "upload" + imgName);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
return rv;
} catch (IOException e) {
e.printStackTrace();
return rv;
}
rv = createHtml(wordPath,imgList, tempContextUrl);
return rv;
}

/**
* ppt转html时生成html
* @param wordPath	upload根目录
* @param imgList	所有幻灯片路径
* @param tempContextUrl	项目访问路径
* @return
*/
private int createHtml(String wordPath,List<String> imgList,String tempContextUrl){
int rv = 0;
StringBuilder sb = new StringBuilder("<!doctype html><html><h
9c6f
ead><meta charset='utf-8'><title>无标题文档</title></head><body>");
if (imgList != null && !imgList.isEmpty()) {
for (String img : imgList) {
sb.append("<img src='" + img + "' /><br>");
}
}
sb.append("</body></html>");

String uuid = UidUtil.generateUUID();
try {
File file = new File(wordPath + File.separator + uuid + ".html");
BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
bufferedWriter.write(sb.toString());
bufferedWriter.close();
} catch (IOException e) {
e.printStackTrace();
return rv;
}
presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html");
return 1;
}

4、pdf转html

原理同ppt转html一样

/**
* pdf转html
* @param tempContextUrl
* @return
*/
private int pdfToHtml(String tempContextUrl){
int rv = 0;
String path = presentationDto.getWordPath();
//word路径
String wordPath = path.substring(0, path.indexOf("upload")+6);
//文件夹名
String folderName = UidUtil.generateUUID();
List<String> imgList = new ArrayList<String>();
File file = new File(path);
try {
PDDocument doc = PDDocument.load(path);
int pageCount = doc.getPageCount();
System.out.println(pageCount);
List pages = doc.getDocumentCatalog().getAllPages();
for(int i=0;i<pages.size();i++){
PDPage page = (PDPage)pages.get(i);
BufferedImage image = page.convertToImage();
Iterator iter = ImageIO.getImageWritersBySuffix("jpg");
ImageWriter writer = (ImageWriter)iter.next();
String imgName = File.separator + folderName + File.separator +i+".jpg";
File folder = new File(wordPath + File.separator + folderName); //先创建文件夹
folder.mkdirs();
File outFile = new File(wordPath + imgName); //再创建文件
imgList.add(File.separator + "upload" + imgName);
outFile.createNewFile();
FileOutputStream out = new FileOutputStream(outFile);
ImageOutputStream outImage = ImageIO.createImageOutputStream(out);
writer.setOutput(outImage);
writer.write(new IIOImage(image,null,null));
}
doc.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
return rv;
} catch (IOException e) {
e.printStackTrace();
return rv;
}
rv = createHtml(wordPath, imgList, tempContextUrl);
return 1;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  maven web java
相关文章推荐