POI3.8读取WORD(简洁实例)
2013-11-23 16:56
411 查看
poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
Java代码
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
public class WordDemo extends HttpServlet {
private static final long serialVersionUID = 1L;
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
this.doPost(request, response);
}
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
//从硬盘读取一个doc文档
InputStream in = new FileInputStream("F:\\test.doc");
//类从word文档中提取文本,非特殊情况下,都将使用getParagraphText()与getText()
WordExtractor word = new WordExtractor(in);
//获取段文本
String [] strArray = word.getParagraphText();
String str = word.getText();
for(int i=0 ; i<strArray.length ; i++){
System.out.println(strArray[i]+"\ti循环:"+i);
}
System.out.println(str +"\t --");
//这个构造函数从InputStream中加载Word文档。
HWPFDocument doc = new HWPFDocument((InputStream)new FileInputStream("F:\\test.doc"));
//这个类为HWPF对象模型,对文档范围段操作
Range range = doc.getRange(); //
//看看此文档有多少个段落
int num = range.numParagraphs();
System.out.println(num+"段");
//得到word数据流
byte [] dataStream = doc.getDataStream();
System.out.println("数据流长度:"+dataStream.length);
//用于在一段范围内获得段落数
int numChar = range.numCharacterRuns();
System.out.println("CharacterRuns 数:"+numChar);
//负责图像提取 和 确定一些文件某块是否包含嵌入的图像。
PicturesTable table = new PicturesTable(doc, dataStream, null);
for(int j=0 ; j<numChar ; j++){
//这个类表示一个文本运行,有着共同的属性。
CharacterRun run = range.getCharacterRun(j);
//是否存在图片
boolean bool = table.hasPicture(run);
System.out.println("是否存在图片:"+bool);
if(bool){
//返回图片对象绑定到指定的CharacterRun
Picture pic = table.extractPicture(run, true);
//图片的内容字节写入到指定的输出流。
pic.writeImageContent(new FileOutputStream("F:\\"+j+".bmp"));
System.out.println("成功提取图片"+j+":");
}
}
request.getRequestDispatcher("ok.jsp").forward(request, response);
}
}
相关文章推荐
- 解析javascript变量
- struts2 相关知识
- Leetcode: Length of Last word
- 深度理解链式前向星
- 用stackedit保存笔记
- 【Linux_Fedora_应用系列】_5_如何安装XZ Utils 解压缩工具以及利用 xz工具来解压缩.xz文件
- 顺时针打印矩阵
- POI 2000 ------Stripes
- iPhone界面跳转
- LSA学习笔记
- 图的邻接矩阵和邻接表表示法
- 二叉树解析实现逆波兰公式算法
- CODE 133: Single Number
- jquery下error报错详解
- restlet中如何获取post方式提交的表单值
- Brief Intro to Blocks 4:Memory Management with Blocks
- android 实现静默安装、卸载
- 《Head First设计模式》学习笔记1
- java序列化与反序列化
- 合成模式(Composite Pattern)