您的位置：首页 > 编程语言 > Java开发

java解析xml文件

2013-06-15 10:24 281 查看

在java环境下读取xml文件的方法主要有4种：DOM、SAX、JDOM、JAXB

1. DOM(Document Object Model)

此方法主要由W3C提供，它将xml文件全部读入内存中，然后将各个元素组成一棵数据树，以便快速的访问各个节点。因此非常消耗系统性能，对比较大的文档不适宜采用DOM方法来解析。 DOM API 直接沿袭了 XML 规范。每个结点都可以扩展的基于 Node 的接口，就多态性的观点来讲，它是优秀的，但是在 Java 语言中的应用不方便，并且可读性不强。

实例：

Java代码

import javax.xml.parsers.*;

//XML解析器接口
import org.w3c.dom.*;
//XML的DOM实现
import org.apache.crimson.tree.XmlDocument;

//写XML文件要用到
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

//允许名字空间
factory.setNamespaceAware(true);

//允许验证
factory.setValidating(true);

//获得DocumentBuilder的一个实例
try {
DocumentBuilder builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException pce) {

System.err.println(pce);
// 出异常时输出异常信息，然后退出，下同
System.exit(1);
}
//解析文档，并获得一个Document实例。
try {
Document doc = builder.parse(fileURI);
} catch (DOMException dom) {

System.err.println(dom.getMessage());
System.exit(1);
} catch (IOException ioe) {

System.err.println(ioe);
System.exit(1);
}
//获得根节点StuInfo
Element elmtStuInfo = doc.getDocumentElement();
//得到所有student节点
NodeList nlStudent = elmtStuInfo.getElementsByTagNameNS(
                                       strNamespace,
"student");
for (……){
     //当前student节点元素
     Element elmtStudent = (Element)nlStudent.item(i);
     NodeList nlCurrent =              elmtStudent.getElementsByTagNameNS(

                                     strNamespace,
"name");
}

import javax.xml.parsers.*;
//XML解析器接口
import org.w3c.dom.*;
//XML的DOM实现
import org.apache.crimson.tree.XmlDocument;
//写XML文件要用到
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
//允许名字空间
factory.setNamespaceAware(true);
//允许验证
factory.setValidating(true);
//获得DocumentBuilder的一个实例
try {
DocumentBuilder builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException pce) {
System.err.println(pce);
//  出异常时输出异常信息，然后退出，下同
System.exit(1);
}
//解析文档，并获得一个Document实例。
try {
Document doc = builder.parse(fileURI);
} catch (DOMException dom) {
System.err.println(dom.getMessage());
System.exit(1);
} catch (IOException ioe) {
System.err.println(ioe);
System.exit(1);
}
//获得根节点StuInfo
Element elmtStuInfo = doc.getDocumentElement();
//得到所有student节点
NodeList nlStudent = elmtStuInfo.getElementsByTagNameNS(
strNamespace, "student");
for (……){
//当前student节点元素
Element elmtStudent = (Element)nlStudent.item(i);
NodeList nlCurrent =              elmtStudent.getElementsByTagNameNS(
strNamespace, "name");
}

对于读取得方法其实是很简单的，写入xml文件也是一样不复杂。

Java代码

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

DocumentBuilder builder = null;

try {
builder = factory .newDocumentBuilder();
} catch (ParserConfigurationException pce) {

System.err.println(pce);
System.exit(1);
}
Document doc = null;
doc = builder .newDocument();
//下面是建立XML文档内容的过程，
//先建立根元素"学生花名册"
Element root = doc.createElement("学生花名册");

//根元素添加上文档
doc.appendChild(root);
//建立"学生"元素，添加到根元素
Element student = doc.createElement("学生");

student.setAttribute("性别", studentBean.getSex());

root.appendChild(student);
//建立"姓名"元素，添加到学生下面，下同
Element name = doc.createElement("姓名");

student.appendChild(name);
Text tName = doc.createTextNode(studentBean.getName());
name.appendChild(tName);
Element age = doc.createElement("年龄");

student.appendChild(age);
Text tAge = doc.createTextNode(String.valueOf(studentBean.getAge()));

age.appendChild(tAge);

DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = null;
try {
builder = factory .newDocumentBuilder();
} catch (ParserConfigurationException pce) {
System.err.println(pce);
System.exit(1);
}
Document doc = null;
doc = builder .newDocument();
//下面是建立XML文档内容的过程，
//先建立根元素"学生花名册"
Element root = doc.createElement("学生花名册");
//根元素添加上文档
doc.appendChild(root);
//建立"学生"元素，添加到根元素
Element student = doc.createElement("学生");
student.setAttribute("性别", studentBean.getSex());
root.appendChild(student);
//建立"姓名"元素，添加到学生下面，下同
Element name = doc.createElement("姓名");
student.appendChild(name);
Text tName = doc.createTextNode(studentBean.getName());
name.appendChild(tName);
Element age = doc.createElement("年龄");
student.appendChild(age);
Text tAge = doc.createTextNode(String.valueOf(studentBean.getAge()));
age.appendChild(tAge);

2.SAX (Simple API for XML)

此方法主要由XML-DEV 邮件列表的成员开发的，SAX是基于事件的方法，它很类似于标签库的处理机制，在标签开始、结束以及错误发生等等地方调用相应的接口实现方法，不是全部文档都读入内存。 SAX具有优异的性能和利用更少的存储空间特点。SAX 的设计只考虑了功能的强大性，却没有考虑程序员使用起来是否方便。
使用必须扩展ContentHandler、ErrorHandler、DTDHandler等，但是必须扩展ContentHandler（或者DefaultHandler ）。

Java代码

import org.xml.sax.*;
public class MyContentHandler
implements ContentHandler {
… …
}
/**
     * 当其他某一个调用事件发生时，先调用此方法来在文档中定位。

     * @param locator
     */
    public void setDocumentLocator(Locator locator){

    }
/**
     * 在解析整个文档开始时调用
     * @throws SAXException
     */
    public void startDocument()
throws SAXException{
        System.out.println("** Student information start **");

    }
/**
     * 在解析整个文档结束时调用
     * @throws SAXException
     */
    public void endDocument()
throws SAXException{
        System.out.println("**** Student information end ****");

    }
/**
     * 在解析名字空间开始时调用
     * @param prefix
     * @param uri
     * @throws SAXException
     */
    public void startPrefixMapping(String prefix

        , String uri) throws SAXException{

    }
/**
     * 在解析名字空间结束时调用
     * @param prefix
     * @throws SAXException
     */
    public void endPrefixMapping(String prefix)
throws SAXException{
    }
/**
     * 在解析元素开始时调用
     * @param namespaceURI
     * @param localName
     * @param qName
     * @param atts
     * @throws SAXException
     */
    public void startElement(String namespaceURI, String localName

        , String qName, Attributes atts) throws SAXException{

    }
/** 在解析元素结束时调用
     * @param namespaceURI
     * @param localName 本地名，如student

     * @param qName 原始名，如LIT:student

     * @throws SAXException   */

    public void endElement(String namespaceURI, String localName,String qName)
throws SAXException{
if (localName.equals(“student”)){

            System.out.println(localName+":"+currentData);

        }
}

import org.xml.sax.*;
public  class  MyContentHandler implements ContentHandler {
… …
}
/**
* 当其他某一个调用事件发生时，先调用此方法来在文档中定位。
* @param locator
*/
public void setDocumentLocator(Locator locator){
}
/**
* 在解析整个文档开始时调用
* @throws SAXException
*/
public void startDocument() throws SAXException{
System.out.println("** Student information start **");
}
/**
* 在解析整个文档结束时调用
* @throws SAXException
*/
public void endDocument() throws SAXException{
System.out.println("**** Student information end ****");
}
/**
* 在解析名字空间开始时调用
* @param prefix
* @param uri
* @throws SAXException
*/
public void startPrefixMapping(String prefix
, String uri) throws SAXException{
}
/**
* 在解析名字空间结束时调用
* @param prefix
* @throws SAXException
*/
public void endPrefixMapping(String prefix) throws SAXException{
}
/**
* 在解析元素开始时调用
* @param namespaceURI
* @param localName
* @param qName
* @param atts
* @throws SAXException
*/
public void startElement(String namespaceURI, String localName
, String qName, Attributes atts) throws SAXException{
}
/** 在解析元素结束时调用
* @param namespaceURI
* @param localName 本地名，如student
* @param qName 原始名，如LIT:student
* @throws SAXException   */
public void endElement(String namespaceURI, String localName,String qName) throws SAXException{
if (localName.equals(“student”)){
System.out.println(localName+":"+currentData);
}
}

取得元素数据的方法——characters

取得元素数据中的空白的方法——ignorableWhitespace

在解析到处理指令时调用的方法——processingInstruction

当未验证解析器忽略实体时调用的方法——skippedEntity

运行时，只需要使用下列代码：

Java代码

MySAXParser mySAXParser = new MySAXParser();

mySAXParser.parserXMLFile("SutInfo.xml");

MySAXParser mySAXParser = new MySAXParser();
mySAXParser.parserXMLFile("SutInfo.xml");

3.JDOM

JDOM的处理方式有些类似于DOM，但它主要是用SAX实现的。JDOM用Java的数据类型来定义操作数据树的各个节点。JDOM的性能也很优越。

Java代码

import org.jdom.*;
import org.jdom.input.*;
import org.jdom.output.*;
SAXBuilder builder = new SAXBuilder(false);

//得到Document
Document doc = builder.build(fileURI);
//名字空间
Namespace ns = Namespace.getNamespace("LIT" ,
"http://www.lit.edu.cn/student/ ");

//取得所有LIT:student节点的集合
List lstStudents = elmtStuInfo.getChildren("student",ns);

for ( … ){
Element elmtStudent = (Element)lstStudents.get(i);
elmtStudent.getChildTextTrim("name", ns);

}
//修改
elmtLesson.getChild("lessonScore" , ns).setText("100");

//删除
elmtStuInfo.removeChild("master", ns);

//添加
elmtStuInfo.addContent(new Element("master" , ns).addContent(new Entity("masterName")));

//输出文档
//第一个参数是缩进字符串，这里是4个空格。
//第二个参数是true，表示需要换行。
XMLOutputter printDoc = new XMLOutputter(" ",
true);
printDoc.output(doc, new FileOutputStream("StuInfo.xml"));

import org.jdom.*;
import org.jdom.input.*;
import org.jdom.output.*;
SAXBuilder builder = new SAXBuilder(false);
//得到Document
Document doc = builder.build(fileURI);
//名字空间
Namespace ns = Namespace.getNamespace("LIT" , "http://www.lit.edu.cn/student/ ");
//取得所有LIT:student节点的集合
List lstStudents = elmtStuInfo.getChildren("student",ns);
for ( … ){
Element elmtStudent = (Element)lstStudents.get(i);
elmtStudent.getChildTextTrim("name", ns);
}
//修改
elmtLesson.getChild("lessonScore" , ns).setText("100");
//删除
elmtStuInfo.removeChild("master", ns);
//添加
elmtStuInfo.addContent(new Element("master" , ns).addContent(new Entity("masterName")));
//输出文档
//第一个参数是缩进字符串，这里是4个空格。
//第二个参数是true，表示需要换行。
XMLOutputter printDoc = new XMLOutputter(" ", true);
printDoc.output(doc, new FileOutputStream("StuInfo.xml"));

4.JAXB (Java And XML Binding)

JAXB 是以SUN为主的一些公司公布的。JAXB将schema（或者DTD）映射为java对象（.java文件），然后使用这些java对象来解析xml文件。需要使用之前生成java文件，因而要有固定的schema，无法处理动态的xml文件。
首先使用xjc命令，生成java文件

xjc [-options ...]
(生成的文件较多)

Java代码

JAXBContext jc = JAXBContext.newInstance(“packageName");

Unmarshaller unmarshaller = jc.createUnmarshaller();
Collection collection= (Collection)unmarshaller.unmarshal(new File(
"books.xml"));
CollectionType.BooksType booksType =collection.getBooks();
List bookList = booksType.getBook();
for( … ){
test.jaxb.BookType book =(test.jaxb.BookType) bookList.get(i);

System.out.println("Book Name: " + book.getName().trim());

System.out.println("Book ISBN: " + book.getISBN());

}

JAXBContext jc = JAXBContext.newInstance(“packageName");
Unmarshaller unmarshaller = jc.createUnmarshaller();
Collection collection= (Collection)unmarshaller.unmarshal(new File( "books.xml"));
CollectionType.BooksType booksType =collection.getBooks();
List bookList = booksType.getBook();
for( … ){
test.jaxb.BookType book =(test.jaxb.BookType) bookList.get(i);
System.out.println("Book Name: " + book.getName().trim());
System.out.println("Book ISBN: " +  book.getISBN());
}

补充另一种方法：

据悉dom4j在xml解析方面是性能最好的，hibernate等框架都使用它作为解析的工具。

要使用dom4j读写XML文档,需要先下载dom4j包,dom4j官方网站在 http://www.dom4j.org/

目前最新dom4j包下载地址:http://nchc.dl.sourceforge.net/sourceforge/dom4j/dom4j-1.6.1.zip

解开后有两个包,仅操作XML文档的话把dom4j-1.6.1.jar加入工程就可以了,如果需要使用XPath的话还需要加入包jaxen-1.1-beta-7.jar

写了简单的dom4j的使用的demo，以备回忆，有些是dom4j的文挡里例子改编的

使用dom4j解析下面的xml文件。

Xml代码

<?xml
version="1.0"
encoding="GB2312"?>

<?xml-stylesheet
type="text/xsl"
href="students.xsl"?>

<students>

    <student
sn="01">

        <name>张三</name>

        <age>18</age>

    </student>


    <student
sn="02">

        <name>李四</name>

        <age>20</age>

    </student>

</students>

<?xml version="1.0" encoding="GB2312"?>

<?xml-stylesheet type="text/xsl" href="students.xsl"?>

<students>
<student sn="01">
<name>张三</name>
<age>18</age>
</student>

<student sn="02">
<name>李四</name>
<age>20</age>
</student>
</students>

Parse.java

Java代码

import java.io.File;

import org.dom4j.Attribute;

import org.dom4j.Document;
import org.dom4j.DocumentException;

import org.dom4j.Element;
import org.dom4j.ProcessingInstruction;

import org.dom4j.VisitorSupport;

import org.dom4j.io.SAXReader;

public class Parse {

    public static
void main(String[] args) {
        SAXReader reader = new SAXReader();

        File file = new File("src/students.xml");

        try {
            Document doc = reader.read(file);
            doc.accept(new MyVistor());

        } catch (DocumentException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();
        }
    }

    public static
class MyVistor extends VisitorSupport {

        public void visit(Attribute node) {

            System.out.println("Attibute:---" + node.getName() +
"="+ node.getValue());
        }

        public void visit(Element node) {

            if (node.isTextOnly()) {

                System.out.println("Element:---" + node.getName() +
"="
                        + node.getText());
            }else{
                System.out.println("--------" + node.getName() +
"-------");
            }
        }

        @Override
        public void visit(ProcessingInstruction node) {

            System.out.println("PI:"+node.getTarget()+" "+node.getText());

        }
    }
}

import java.io.File;

import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.ProcessingInstruction;
import org.dom4j.VisitorSupport;
import org.dom4j.io.SAXReader;

public class Parse {

public static void main(String[] args) {
SAXReader reader = new SAXReader();
File file = new File("src/students.xml");
try {
Document doc = reader.read(file);
doc.accept(new MyVistor());
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public static class MyVistor extends VisitorSupport {
public void visit(Attribute node) {
System.out.println("Attibute:---" + node.getName() + "="+ node.getValue());
}

public void visit(Element node) {
if (node.isTextOnly()) {
System.out.println("Element:---" + node.getName() + "="
+ node.getText());
}else{
System.out.println("--------" + node.getName() + "-------");
}
}

@Override
public void visit(ProcessingInstruction node) {
System.out.println("PI:"+node.getTarget()+" "+node.getText());
}
}
}

使用dom4j来将属性写入xml

Java代码

import java.io.FileWriter;
import java.io.IOException;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;

import org.dom4j.Element;
import org.dom4j.io.OutputFormat;

import org.dom4j.io.XMLWriter;

public class DWriter {

    public static
void main(String[] args) {
        // TODO Auto-generated method stub

        try {
            XMLWriter writer = new XMLWriter(new FileWriter("src/author.xml"));

            Document doc = createDoc();
            writer.write(doc);
            writer.close();

            // Pretty print the document to System.out

            // 设置了打印的格式,将读出到控制台的格式进行美化

            OutputFormat format = OutputFormat.createPrettyPrint();

            writer = new XMLWriter(System.out, format);

            writer.write(doc);

        } catch (IOException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();
        }
    }

    public static Document createDoc() {

        Document doc = DocumentHelper.createDocument();
        Element root = doc.addElement("root");

        Element author1 = root.addElement("author").addAttribute("name",

                "Kree").addAttribute("location",
"UK")
                .addText("Kree Strachan");

        Element author2 = root.addElement("author").addAttribute("name",
"King")
                .addAttribute("location",
"US").addText("King McWrirter");

        return doc;
    }
}

import java.io.FileWriter;
import java.io.IOException;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

public class DWriter {

public static void main(String[] args) {
// TODO Auto-generated method stub
try {
XMLWriter writer = new XMLWriter(new FileWriter("src/author.xml"));
Document doc = createDoc();
writer.write(doc);
writer.close();

// Pretty print the document to System.out
// 设置了打印的格式,将读出到控制台的格式进行美化
OutputFormat format = OutputFormat.createPrettyPrint();
writer = new XMLWriter(System.out, format);
writer.write(doc);

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}

public static Document createDoc() {
Document doc = DocumentHelper.createDocument();
Element root = doc.addElement("root");
Element author1 = root.addElement("author").addAttribute("name",
"Kree").addAttribute("location", "UK")
.addText("Kree Strachan");
Element author2 = root.addElement("author").addAttribute("name", "King")
.addAttribute("location", "US").addText("King McWrirter");
return doc;
}
}

使用dom4j写入到author.xml文件的内容

Java代码

<?xml version="1.0" encoding="UTF-8"?>

<root>
<author name="Kree" location="UK">Kree Strachan</author>

<author name="King" location="US">King McWrirter</author>

</root>

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： XML

相关文章推荐

新的分享

章节导航