您的位置:首页 > Web前端 > HTML

pio应用excel,word转html

2013-11-23 19:01 435 查看


package com.acdm.util;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStream;

import java.io.OutputStreamWriter;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.model.PicturesTable;

import org.apache.poi.hwpf.usermodel.CharacterRun;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.Range;

import org.apache.poi.hwpf.extractor.WordExtractor;

import org.apache.poi.hwpf.usermodel.Paragraph;   

import org.apache.poi.hwpf.usermodel.Table;   

import org.apache.poi.hwpf.usermodel.TableCell;   

import org.apache.poi.hwpf.usermodel.TableIterator;   

import org.apache.poi.hwpf.usermodel.TableRow;  

public class WordExcelToHtml {

 /**

  * 回车符ASCII码

  */

 private static final short ENTER_ASCII = 13;

 /**

  * 空格符ASCII码

  */

 private static final short SPACE_ASCII = 32;

 /**

  * 水平制表符ASCII码

  */

 private static final short TABULATION_ASCII = 9;

 public static String htmlText = "";

 public static String htmlTextTbl = "";

 public static int counter=0;

 public static int beginPosi=0;

 public static int endPosi=0;

 public static int beginArray[];

 public static int endArray[];

 public static String htmlTextArray[];

 public static boolean tblExist=false;

 

 public static final String inputFile="f://22.doc";

 public static void main(String argv[])

 {  

  try {

   //getWordAndStyle(inputFile);

  } catch (Exception e) {

   // TODO Auto-generated catch block

   e.printStackTrace();

  }

 }

 

 /**

  * 读取每个文字样式

  * 

  * @param fileName

  * @throws Exception

  */

 

 public static void getWordAndStyle(String fileName,String path,String path2) throws Exception {

  FileInputStream in = new FileInputStream(new File(fileName));

  HWPFDocument doc = new HWPFDocument(in);

  

      Range rangetbl = doc.getRange();//得到文档的读取范围   

   TableIterator it = new TableIterator(rangetbl); 

   int num=100;   

  

   

   beginArray=new int[num];

   endArray=new int[num];

   htmlTextArray=new String[num];

   

   

   

   

  

  // 取得文档中字符的总数

  int length = doc.characterLength();

  // 创建图片容器

  PicturesTable pTable = doc.getPicturesTable();

        

  htmlText = "<html><head><title>" + doc.getSummaryInformation().getTitle() + "</title></head><body>";

  // 创建临时字符串,好加以判断一串字符是否存在相同格式

  

   if(it.hasNext())

   {

    readTable(it,rangetbl);

   }

   

   int cur=0;

    

  String tempString = "";

  for (int i = 0; i < length - 1; i++) {

   // 整篇文章的字符通过一个个字符的来判断,range为得到文档的范围

   Range range = new Range(i, i + 1, doc);

   

   

   

   CharacterRun cr = range.getCharacterRun(0); 

   //beginArray=new int[num];

    //endArray=new int[num];

    //htmlTextArray=new String[num];

   if(tblExist)

   {

    if(i==beginArray[cur])

    {   

     htmlText+=tempString+htmlTextArray[cur];

     tempString="";

     i=endArray[cur]-1;

     cur++;

     continue;

    }

   }

   if (pTable.hasPicture(cr)) {

    htmlText +=  tempString ;    

    // 读写图片    

    readPicture(pTable, cr,path2);

    tempString = "";    

   } 

   else {

       

    Range range2 = new Range(i + 1, i + 2, doc);

    // 第二个字符

    CharacterRun cr2 = range2.getCharacterRun(0);

    char c = cr.text().charAt(0);

    

    System.out.println(i+"::"+range.getEndOffset()+"::"+range.getStartOffset()+"::"+c);

    

    // 判断是否为回车符

    if (c == ENTER_ASCII)

     {

     tempString += "<br/>";

     

     }

    // 判断是否为空格符

    else if (c == SPACE_ASCII)

     tempString += " ";

    // 判断是否为水平制表符

    else if (c == TABULATION_ASCII)

     tempString += "    ";

    // 比较前后2个字符是否具有相同的格式

    boolean flag = compareCharStyle(cr, cr2);

    if (flag)

     tempString += cr.text();

    else {

     String fontStyle = "<span style="+"font-family:"+" + cr.getFontName() + "+";font-size:" + cr.getFontSize() / 2 + "pt;";

         

     if (cr.isBold())

      fontStyle += "font-weight:bold;";

     if (cr.isItalic())

      fontStyle += "font-style:italic;";

     

       htmlText += fontStyle + "mce_style="+"font-family:"+" + cr.getFontName() + "+";font-size:" + cr.getFontSize() / 2 + "pt;";    

     if (cr.isBold())

      fontStyle += "font-weight:bold;";

     if (cr.isItalic())

      fontStyle += "font-style:italic;";

     

     htmlText += fontStyle + ">" + tempString + cr.text()+"</span> " ;

      

     tempString = "";

    }

   }

  }

  htmlText += tempString+"</body></html>";

  writeFile(htmlText,path);

 }

 

 /**

  * 读写文档中的表格

  * 

  * @param pTable

  * @param cr

  * @throws Exception

  */

 public static void readTable(TableIterator it, Range rangetbl) throws Exception {

  htmlTextTbl="";

      //迭代文档中的表格  

  

        counter=-1;

        while (it.hasNext()) 

        { 

         tblExist=true;

          htmlTextTbl="";

          Table tb = (Table) it.next();    

          beginPosi=tb.getStartOffset() ;

          endPosi=tb.getEndOffset();

          

          System.out.println("............"+beginPosi+"...."+endPosi);

          counter=counter+1;

          //迭代行,默认从0开始

          beginArray[counter]=beginPosi;

          endArray[counter]=endPosi;

          

          htmlTextTbl+="<table border>";

       for (int i = 0; i < tb.numRows(); i++) {      

    TableRow tr = tb.getRow(i);   

    

    htmlTextTbl+="<tr>";

    //迭代列,默认从0开始   

    for (int j = 0; j < tr.numCells(); j++) {      

     TableCell td = tr.getCell(j);//取得单元格

     int cellWidth=td.getWidth();

     

     //取得单元格的内容   

     for(int k=0;k<td.numParagraphs();k++){      

                Paragraph para =td.getParagraph(k);      

                String s = para.text().toString().trim();   

                if(s=="")

                {

                 s=" ";

                }

                System.out.println(s);   

                htmlTextTbl += "<td width="+cellWidth+ ">"+s+"</td>";

                System.out.println(i+":"+j+":"+cellWidth+":"+s);

           } //end for       

        }   //end for   

     }   //end for   

      htmlTextTbl+="</table>" ;    

      htmlTextArray[counter]=htmlTextTbl;

  

        } //end while 

 } 

 

 /**

  * 读写文档中的图片

  * 

  * @param pTable

  * @param cr

  * @throws Exception

  */

 public static void readPicture(PicturesTable pTable, CharacterRun cr,String path2) throws Exception {

  // 提取图片

  Picture pic = pTable.extractPicture(cr, false);

  // 返回POI建议的图片文件名

  String afileName = pic.suggestFullFileName();

  OutputStream out = new FileOutputStream(new File(path2 + File.separator + afileName));

  pic.writeImageContent(out);

  htmlText += "<img src='"+path2 +"\\"+afileName + "' mce_src='"+path2 +"\\"+ afileName + "'/>";

 }

 public static boolean compareCharStyle(CharacterRun cr1, CharacterRun cr2) 

 {

  boolean flag = false;

  if (cr1.isBold() == cr2.isBold() && cr1.isItalic() == cr2.isItalic() && cr1.getFontName().equals(cr2.getFontName()) && cr1.getFontSize() == cr2.getFontSize()) 

  {

   flag = true;

  }

  return flag;

 }

 

 /**

  * 写文件

  * 

  * @param s

  */

 public static void writeFile(String s,String path) {

  FileOutputStream fos = null;

  BufferedWriter bw = null;

  try {

   File file = new File(path);

   fos = new FileOutputStream(file);

   bw = new BufferedWriter(new OutputStreamWriter(fos));

   bw.write(s);

  } catch (FileNotFoundException fnfe) {

   fnfe.printStackTrace();

  } catch (IOException ioe) {

   ioe.printStackTrace();

  } finally {

   try {

    if (bw != null)

     bw.close();

    if (fos != null)

     fos.close();

   } catch (IOException ie) {

   }

  }

 }

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: