您的位置:首页 > 其它

XML解析(DOM,SAX,dom4j)

2014-12-23 13:57 309 查看
package com.ztt;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.dom4j.DocumentException;
import org.dom4j.DocumentHelper;
import org.dom4j.io.DOMReader;
import org.dom4j.io.SAXReader;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class HttpConnectionXML
{
	static StringBuffer result = new StringBuffer();
	public static void main(String[] args)
	{
		httpConnection();
	}
	
	public static void httpConnection()
	{
		URL url = null;
		BufferedReader reader = null;
		try
		{
			// 这里使用了博客园的接口
			String urlStr = "http://wcf.open.cnblogs.com/news/recent/5";
			url = new URL(urlStr);
			URLConnection connection = url.openConnection();
		    // 设置通用的请求属性		设置头信息的,比如格式,UA等,不设置有默认
            connection.setRequestProperty("accept", "*/*");
            connection.setRequestProperty("connection", "Keep-Alive");
            connection.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");
            // 建立链接
            connection.connect();
            Map<String, List<String>> map = connection.getHeaderFields();
            for (String key : map.keySet())
			{
//				System.out.println(key + ">>>>>>>>>>>" + map.get(key));
			}
            InputStream inputStream = connection.getInputStream();
            // DOM解析
//            dom(inputStream);
            // SAX解析
//            sax(inputStream);
            // dom4j SAX解析
            dom4jSAX(inputStream);
            // dom4j DOM解析
            dom4jDOM(inputStream);
//            reader = new BufferedReader(new InputStreamReader(inputStream));
//			String line = "";
//			while((line = reader.readLine()) != null)
//			{
//				result.append(line);
//			}
//			System.out.println(result);
            
		} catch (MalformedURLException e)
		{
			e.printStackTrace();
		} catch (IOException e)
		{
			e.printStackTrace();
		}finally
		{
			try
			{
				if (reader!=null)
				reader.close();
			} catch (IOException e)
			{
				e.printStackTrace();
			}
		}
		
	}

	public static void dom(InputStream in)
	{
		// 1、建立文档处理工厂
		DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
		try
		{
			// 2、文档解析器
			DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
			// 3、得到文档
			Document document = documentBuilder.parse(in);
			// 4、找到其中的“entry”节点
			NodeList nodeList = document.getElementsByTagName("entry");
			// 5、循环节点,取出数据
			for (int i = 0; i < nodeList.getLength(); i++)
			{
				Element note = (Element) nodeList.item(i);
				System.out.println("TITLE-----" + note.getElementsByTagName("title").item(0).getFirstChild().getNodeValue());
				System.out.println("LINK------" + note.getElementsByTagName("link").item(0).getAttributes().getNamedItem("href"));
			}
		} catch (ParserConfigurationException e)
		{
			e.printStackTrace();
		} catch (SAXException e)
		{
			e.printStackTrace();
		} catch (IOException e)
		{
			e.printStackTrace();
		}
	}
	
	public static void sax(InputStream in)
	{
		// 1、SAX处理工厂
		SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
		try
		{
			// SAX解析器
			SAXParser saxParser = saxParserFactory.newSAXParser();
			// 自定义的处理器
			MyHandler myHandler = new MyHandler();
			
			saxParser.parse(in, myHandler);
		} catch (ParserConfigurationException e)
		{
			e.printStackTrace();
		} catch (SAXException e)
		{
			e.printStackTrace();
		} catch (IOException e)
		{
			e.printStackTrace();
		}
	}
	
	public static void dom4jSAX(InputStream in)
	{
		SAXReader saxReader = new SAXReader();
		try
		{
			org.dom4j.Document document = saxReader.read(in);
			// 得到根节点 一级节点
			org.dom4j.Element rootNode = document.getRootElement();
			// 取得根节点底下的二级节点集合
			List<org.dom4j.Element> rootList = rootNode.elements();
			for (int i = 0; i < rootList.size(); i++)
			{
				if ("entry".equals(rootList.get(i).getName()))
				{
					// 得到 entry 节点下面的三级节点集合
					List<org.dom4j.Element> entryList = rootList.get(i).elements();
					for (int j = 0; j < entryList.size(); j++)
					{
						switch (entryList.get(j).getName())
						{
						case "title":// 得到值
							System.out.println(entryList.get(j).getText());
							break;
						case "link": // 得到属性
							org.dom4j.Element linkElement = entryList.get(j);
							System.out.println(linkElement.attribute(1).getValue());
							break;
						}
					}
				}
			}
		} catch (DocumentException e)
		{
			e.printStackTrace();
		}
	}
	
	public static void dom4jDOM(InputStream in)
	{
		DOMReader domReader = new DOMReader();
		DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
		DocumentBuilder documentBuilder;
		try
		{
			documentBuilder = documentBuilderFactory.newDocumentBuilder();
			org.w3c.dom.Document domDocument = documentBuilder.parse(in);
			// 参数为org.w3c.dom.Document实例
			org.dom4j.Document document = domReader.read(domDocument);
			org.dom4j.Element rootElement = document.getRootElement();
			// 取得二级节点的迭代器
			Iterator<org.dom4j.Element> iterator = rootElement.elementIterator("entry");
			while (iterator.hasNext())
			{
				org.dom4j.Element element = iterator.next();
				// 取的值
				System.out.println(element.elementTextTrim("title"));
				// 取得属性 
				System.out.println(element.element("link").attributeValue("href"));
			}
		} catch (ParserConfigurationException e)
		{
			e.printStackTrace();
		} catch (SAXException e)
		{
			e.printStackTrace();
		} catch (IOException e)
		{
			e.printStackTrace();
		}
	}
}
<pre name="code" class="java">package com.ztt;import org.xml.sax.Attributes;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;public class MyHandler extends DefaultHandler{	private String tagName;		private String titleStr;	private String link_href;	@Override	public void startDocument() throws SAXException	{		super.startDocument();		System.out.println("------------------------------------------");	}	@Override	public void endDocument() throws SAXException	{		super.endDocument();		System.out.println("------------------------------------------");	}	@Override	public void startElement(String uri, String localName, String qName,			Attributes attributes) throws SAXException	{		super.startElement(uri, localName, qName, attributes);		tagName = qName;		switch (tagName)		{		case "link":			System.out.println(attributes.getLocalName(1));			link_href = attributes.getValue(1);			System.out.println(link_href);			break;		}	}	@Override	public void endElement(String uri, String localName, String qName)			throws SAXException	{		super.endElement(uri, localName, qName);		switch (tagName)		{		case "title":			System.out.println(titleStr);			break;		}	}	@Override	public void characters(char[] ch, int start, int length)			throws SAXException	{		super.characters(ch, start, length);		switch (tagName)		{		case "title":			titleStr = new String(ch, start, length);			break;		}	}}

                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: