docx、ppt、xls、pdf文件转html
2017-04-07 17:01
337 查看
场景:
后台上传的文档要再前端app上在线预览
解决办法:
将文档转成html用于前端显示
maven引入
具体代码:
1、docx转html
2、xls转html
3、ppt转html
其实只是ppt转图片,有了图片后放到页面上去显示。
4、pdf转html
原理同ppt转html一样
/**
* pdf转html
* @param tempContextUrl
* @return
*/
private int pdfToHtml(String tempContextUrl){
int rv = 0;
String path = presentationDto.getWordPath();
//word路径
String wordPath = path.substring(0, path.indexOf("upload")+6);
//文件夹名
String folderName = UidUtil.generateUUID();
List<String> imgList = new ArrayList<String>();
File file = new File(path);
try {
PDDocument doc = PDDocument.load(path);
int pageCount = doc.getPageCount();
System.out.println(pageCount);
List pages = doc.getDocumentCatalog().getAllPages();
for(int i=0;i<pages.size();i++){
PDPage page = (PDPage)pages.get(i);
BufferedImage image = page.convertToImage();
Iterator iter = ImageIO.getImageWritersBySuffix("jpg");
ImageWriter writer = (ImageWriter)iter.next();
String imgName = File.separator + folderName + File.separator +i+".jpg";
File folder = new File(wordPath + File.separator + folderName); //先创建文件夹
folder.mkdirs();
File outFile = new File(wordPath + imgName); //再创建文件
imgList.add(File.separator + "upload" + imgName);
outFile.createNewFile();
FileOutputStream out = new FileOutputStream(outFile);
ImageOutputStream outImage = ImageIO.createImageOutputStream(out);
writer.setOutput(outImage);
writer.write(new IIOImage(image,null,null));
}
doc.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
return rv;
} catch (IOException e) {
e.printStackTrace();
return rv;
}
rv = createHtml(wordPath, imgList, tempContextUrl);
return 1;
}
后台上传的文档要再前端app上在线预览
解决办法:
将文档转成html用于前端显示
maven引入
<dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>fr.opensagres.xdocreport.document</artifactId> <version>1.0.5</version> </dependency> <dependency> <groupId>fr.opensagres.xdocreport</groupId> <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId> <version>1.0.5</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.9</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.9</version> </dependency>
具体代码:
1、docx转html
/** * docx文件转html * @param tempContextUrl 项目访问名 * @return */ public int Word2007ToHtml(String tempContextUrl) { int rv = 0; try { String path = presentationDto.getWordPath(); //word路径 String wordPath = path.substring(0, path.indexOf("upload")+6); //word文件名 String wordName = path.substring(path.lastIndexOf(File.separator)+1,path.lastIndexOf(".")); //后缀 String suffix = path.substring(path.lastIndexOf(".")); //生成html路径 String htmlPath = wordPath + File.separator + System.currentTimeMillis() + "_show" + File.separator; //生成html文件名 String htmlName = System.currentTimeMillis() + ".html"; //图片路径 String imagePath = htmlPath + "image" + File.separator; //判断html文件是否存在 File htmlFile = new File(htmlPath + htmlName); //word文件 File wordFile = new File(wordPath + File.separator + wordName + suffix); // 1) 加载word文档生成 XWPFDocument对象 InputStream in = new FileInputStream(wordFile); XWPFDocument document = new XWPFDocument(in); // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录) File imgFolder = new File(imagePath); XHTMLOptions options = XHTMLOptions.create(); options.setExtractor(new FileImageExtractor(imgFolder)); //html中图片的路径 相对路径 options.URIResolver(new BasicURIResolver("image")); options.setIgnoreStylesIfUnused(false); options.setFragment(true); // 3) 将 XWPFDocument转换成XHTML //生成html文件上级文件夹 File folder = new File(htmlPath); if(!folder.exists()){ folder.mkdirs(); } OutputStream out = new FileOutputStream(htmlFile); XHTMLConverter.getInstance().convert(document, out, options); // 4) 转换为项目访问路径 String absolutePath = htmlFile.getAbsolutePath(); htmlPath = tempContextUrl + absolutePath.substring(absolutePath.indexOf("upload")); presentationDto.setHtmlPath(htmlPath); } catch (FileNotFoundException e) { e.printStackTrace(); return rv; } catch (XWPFConverterException e) { e.printStackTrace(); return rv; } catch (IOException e) { e.printStackTrace(); return rv; } rv = 1; return rv; }
2、xls转html
private int xlsToHtml(String tempContextUrl){ int rv = 0; String path = presentationDto.getWordPath(); //word路径 String wordPath = path.substring(0, path.indexOf("upload")+6) + File.separator; //word文件名 String wordName = path.substring(path.lastIndexOf(File.separator)+1); try { InputStream input=new FileInputStream(wordPath+wordName); HSSFWorkbook excelBook=new HSSFWorkbook(input); ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() ); excelToHtmlConverter.processWorkbook(excelBook); List pics = excelBook.getAllPictures(); if (pics != null) { for (int i = 0; i < pics.size(); i++) { Picture pic = (Picture) pics.get (i); try { pic.writeImageContent (new FileOutputStream (wordPath + pic.suggestFullFileName() ) ); } catch (FileNotFoundException e) { e.printStackTrace(); } } } Document htmlDocument =excelToHtmlConverter.getDocument(); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource (htmlDocument); StreamResult streamResult = new StreamResult (outStream); TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8"); serializer.setOutputProperty (OutputKeys.INDENT, "yes"); serializer.setOutputProperty (OutputKeys.METHOD, "html"); serializer.transform (domSource, streamResult); outStream.close(); String content = new String (outStream.toByteArray(),"utf-8"); String uuid = UidUtil.generateUUID(); FileUtils.writeStringToFile(new File(wordPath, uuid+".html"), content, "utf-8"); presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html"); } catch (Exception e) { e.printStackTrace(); return rv; } rv = 1; return rv; }
3、ppt转html
其实只是ppt转图片,有了图片后放到页面上去显示。
/** * ppt转html * @param tempContextUrl * @return */ private int pptToHtml(String tempContextUrl){ int rv = 0; String path = presentationDto.getWordPath(); //word路径 String wordPath = path.substring(0, path.indexOf("upload")+6); //文件夹名 String folderName = UidUtil.generateUUID(); List<String> imgList = new ArrayList<String>(); File file = new File(path); File folder = new File(wordPath + File.separator + folderName); try { folder.mkdirs(); FileInputStream is = new FileInputStream(file); SlideShow ppt = new SlideShow(is); is.close(); Dimension pgsize = ppt.getPageSize(); org.apache.poi.hslf.model.Slide[] slide = ppt.getSlides(); for (int i = 0; i < slide.length; i++) { TextRun[] truns = slide[i].getTextRuns(); for ( int k=0;k<truns.length;k++){ RichTextRun[] rtruns = truns[k].getRichTextRuns(); for(int l=0;l<rtruns.length;l++){ rtruns[l].setFontIndex(1); rtruns[l].setFontName("宋体"); } } BufferedImage img = new BufferedImage(pgsize.width,pgsize.height, BufferedImage.TYPE_INT_RGB); Graphics2D graphics = img.createGraphics(); graphics.setPaint(Color.BLUE); graphics.fill(new Rectangle2D.Float(0, 0, pgsize.width, pgsize.height)); slide[i].draw(graphics); // 这里设置图片的存放路径和图片的格式(jpeg,png,bmp等等),注意生成文件路径 String imgName = File.separator + folderName + File.separator +"pict_"+ (i + 1) + ".jpeg"; FileOutputStream out = new FileOutputStream(wordPath + imgName); javax.imageio.ImageIO.write(img, "jpeg", out); out.close(); imgList.add(File.separator + "upload" + imgName); } } catch (FileNotFoundException e) { e.printStackTrace(); return rv; } catch (IOException e) { e.printStackTrace(); return rv; } rv = createHtml(wordPath,imgList, tempContextUrl); return rv; } /** * ppt转html时生成html * @param wordPath upload根目录 * @param imgList 所有幻灯片路径 * @param tempContextUrl 项目访问路径 * @return */ private int createHtml(String wordPath,List<String> imgList,String tempContextUrl){ int rv = 0; StringBuilder sb = new StringBuilder("<!doctype html><html><h 9c6f ead><meta charset='utf-8'><title>无标题文档</title></head><body>"); if (imgList != null && !imgList.isEmpty()) { for (String img : imgList) { sb.append("<img src='" + img + "' /><br>"); } } sb.append("</body></html>"); String uuid = UidUtil.generateUUID(); try { File file = new File(wordPath + File.separator + uuid + ".html"); BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8")); bufferedWriter.write(sb.toString()); bufferedWriter.close(); } catch (IOException e) { e.printStackTrace(); return rv; } presentationDto.setHtmlPath(tempContextUrl + "upload" + File.separator + uuid+".html"); return 1; }
4、pdf转html
原理同ppt转html一样
/**
* pdf转html
* @param tempContextUrl
* @return
*/
private int pdfToHtml(String tempContextUrl){
int rv = 0;
String path = presentationDto.getWordPath();
//word路径
String wordPath = path.substring(0, path.indexOf("upload")+6);
//文件夹名
String folderName = UidUtil.generateUUID();
List<String> imgList = new ArrayList<String>();
File file = new File(path);
try {
PDDocument doc = PDDocument.load(path);
int pageCount = doc.getPageCount();
System.out.println(pageCount);
List pages = doc.getDocumentCatalog().getAllPages();
for(int i=0;i<pages.size();i++){
PDPage page = (PDPage)pages.get(i);
BufferedImage image = page.convertToImage();
Iterator iter = ImageIO.getImageWritersBySuffix("jpg");
ImageWriter writer = (ImageWriter)iter.next();
String imgName = File.separator + folderName + File.separator +i+".jpg";
File folder = new File(wordPath + File.separator + folderName); //先创建文件夹
folder.mkdirs();
File outFile = new File(wordPath + imgName); //再创建文件
imgList.add(File.separator + "upload" + imgName);
outFile.createNewFile();
FileOutputStream out = new FileOutputStream(outFile);
ImageOutputStream outImage = ImageIO.createImageOutputStream(out);
writer.setOutput(outImage);
writer.write(new IIOImage(image,null,null));
}
doc.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
return rv;
} catch (IOException e) {
e.printStackTrace();
return rv;
}
rv = createHtml(wordPath, imgList, tempContextUrl);
return 1;
}
相关文章推荐
- lecene.net实现pdf,doc,xls,ppt,htm,html等格式文件的检索
- Android中pdf,doc,docx,xls,xlsx,ppt,pptx等office文件预览
- 使用POI将office(doc/docx/ppt/pptx/xls/xlsx)文件转html格式(附带源码)
- Java 使用jacob ppt文件转pptx,doc转docx;word 转html、pdf等
- lucene实现pdf,doc,xls,ppt,htm,html等格式文件的检索
- lecene.net实现pdf,doc,xls,ppt,htm,html等格式文件的检索
- "doc", "docx", "xls", "xlsx", "ppt", "pptx",txt。等文件转化为pdf
- 【Android】Android阅读各种格式文件(包括DOC/DOCX/WPS/XLS/XLSX/PPT/PPTX/TXT/PDF)
- lecene.net实现pdf,doc,xls,ppt,htm,html等格式文件的检索
- 【ASP.NET 进阶】仿百度文库文档在线预览(支持格式.pdf,.doc,docx,xls,xlsx,.ppt,pptx)
- excel(xls),doc,ppt,xml.zip,rar,pdf,jpeg等各种文件格式相关开源项目介绍
- android用于打开各种文件的intent,包括以下文件PDF,PPT,WORD,EXCEL,CHM,HTML,TEXT,AUDIO,VIDEO
- Android打开 解析 doc,docx,xls,xlsx,ppt等格式文件的源码
- iOS 打开html、txt、PDF、PPT等文件
- ios开发之使用UIWebView打开本地的HTML、txt、PDF、PPT文件、打开远程的共享资源
- Android执行打开文件(PDF,PPT,WORD,EXCEL,CHM,HTML,TEXT,AUDIO,VIDEO)的总结
- openoffic+java+spring 多线程 转换doc,ppt,xls -> html/pdf
- 将pdf、doc、docx、ppt格式文件转化swf格式文件的方法
- android 打开 ppt pptx doc docx xls xlsx pdf office 文档
- 直接在浏览器打开pdf文件,ppt文件,doc文件,xls文件,rar文件,zip文件等