您的位置:首页 > 运维架构

XML解析,sax实现详细解说

2010-09-17 16:44 239 查看
这xml解析呢与上次的html解析是一个功能,功能介绍,就是取多个xml的解析工作。我想大家对与sax解析也不陌生了。它的以堆栈的方式来工作的,工作原理,也较有复杂。同样,废话少说,马上进入正题。

sax实现批量xml解析,上次忘记说了实现思路,所谓的批量操作,我的实现是写很多个属性文件,开始配置。然后在一个主配置文件开始做操作。所谓的主文件就是把所有配置文件扩展名前的名称放到主文件里,后续我在介绍次

 

代码演示:

package com.tz.xml;

import java.util.ArrayList;

import java.util.List;

import java.util.Properties;

import org.apache.commons.lang.StringUtils;

import org.xml.sax.Attributes;

import org.xml.sax.SAXException;

import org.xml.sax.SAXParseException;

import org.xml.sax.helpers.DefaultHandler;

import com.tz.tools.PropertiesTools;

import com.tz.tools.SaveImage;

/**

 * XML解析

 *

 * @author Sunweikun

 *

 */

public class XMLResolve extends DefaultHandler {

    //这一堆的变量统一解释下,tagValue是主要操作变量,pts是读取配置文件类 saveImg是下载文件类,下面的List保存取到的节点值

    private StringBuffer tagValue =new StringBuffer();

    private PropertiesTools pts;

    //private SaveImage saveImg;

    private List<String> imagesList=new ArrayList<String>();

    private List<String> priceList=new ArrayList<String>();

    private List<String> valueList=new ArrayList<String>();

    private List<String> titleList=new ArrayList<String>();

    private List<String> numberList=new ArrayList<String>();

    private List<String> cityList=new ArrayList<String>();

    private List<String> urlList=new ArrayList<String>();

    public XMLResolve() {

    }

 

    /**

     * 开始解析XML文件

     */

    public void startDocument() throws SAXException {

        // 可以在此初始化变量等操作

      //  System.out.println("~~~~解析文档开始~~~");

    }

    /**

     * 结束解析XML文件

     */

    public void endDocument() throws SAXException {

       // System.out.println("~~~~解析文档结束~~~");

    }

    /**

     * 在遇到结束标签时调用此方法

     */

    public void endElement(String uri, String localName, String qName)throws SAXException {

        String title = null, sold = null, city = null, url = null,

        price = null, value = null, img = null;

        for (int i = 0; i < this.pts.getXMLURL().length; i++) {

            Properties p = this.pts.getProperties(this.pts.getXMLURL()[i],"xml");

            // 获取标题

            if (p.getProperty("title").equals(qName)) {

                title = tagValue.toString().trim();

            }

            // 获取购买人数

            if (p.getProperty("quantity_sold").equals(qName)) {

                sold = tagValue.toString().trim();

            }

            // 获取城市名称

            if (p.getProperty("division_name").equals(qName)) {

                city = tagValue.toString().trim();

            }

            // 获取团购地址

            if (p.getProperty("deal_url").equals(qName)) {

                url = tagValue.toString().trim();

            }

            // 获取现在价格

            if (p.getProperty("price").equals(qName)) {

                price = tagValue.toString().trim();

            }

            // 获取原来价格

            if (p.getProperty("value").equals(qName)) {

                value = tagValue.toString().trim();

            }

            // 获取图片地址

            if (p.getProperty("image_Url").equalsIgnoreCase(qName)) {

                img = tagValue.toString().trim();

            }

        }

        if (title != null){

            titleList.add(title);

        }

        if (sold != null){

            numberList.add(sold);

        }

        if (city != null){

           cityList.add(city);

        }

        if (url != null) {

            if (!StringUtils.isNumeric(url)) {

                urlList.add(url);

            }

        }

        if (price != null){

            priceList.add(price);

        }

        if (value != null){

            valueList.add(value);

        }

        if (img != null) {

            imagesList.add(img);

        }

    }

    /**

     * 所有的XML文件中的字符会放到ch[]中

     */

    public void characters(char ch[], int start, int length)throws SAXException {

        tagValue.append(ch, start, length);

    }

    /**

     * 错误的解析通知

     */

    public void error(SAXParseException e) throws SAXException {

    }

    /**

     * 在开始是要取的属性

     */

    public void startElement(String uri, String localName, String qName,Attributes attributes) throws SAXException {

       //初始化

      //  tagValue = new StringBuffer();

        //每一次获取标签是清空缓冲

        tagValue.delete(0, tagValue.length());

        super.startElement(uri, localName, qName, attributes);

    }

    public void setPts(PropertiesTools pts) {

        this.pts = pts;

    }

    public void setSaveImg(SaveImage saveImg) {

        this.saveImg = saveImg;

    }

    public List<String> getImagesList() {

        return imagesList;

    }

    public void setImagesList(List<String> imagesList) {

        this.imagesList = imagesList;

    }

    public List<String> getPriceList() {

        return priceList;

    }

    public void setPriceList(List<String> priceList) {

        this.priceList = priceList;

    }

    public List<String> getValueList() {

        return valueList;

    }

    public void setValueList(List<String> valueList) {

        this.valueList = valueList;

    }

    public List<String> getTitleList() {

        return titleList;

    }

    public void setTitleList(List<String> titleList) {

        this.titleList = titleList;

    }

    public List<String> getNumberList() {

        return numberList;

    }

    public void setNumberList(List<String> numberList) {

        this.numberList = numberList;

    }

    public List<String> getCityList() {

        return cityList;

    }

    public void setCityList(List<String> cityList) {

        this.cityList = cityList;

    }

    public List<String> getUrlList() {

        return urlList;

    }

    public void setUrlList(List<String> urlList) {

        this.urlList = urlList;

    }

}

--------------------------------------------------------------

下面是执行方法

--------------------------------------------------------------

package com.tz.xml;

import java.io.IOException;

import java.io.InputStream;

import java.net.URL;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.parsers.SAXParser;

import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.SAXException;

import com.tz.tools.Propert
aa63
iesTools;

/**

 * 操作类

 * @author Sunweikun

 *

 */

public class SendRE {

    public PropertiesTools pts;

    public XMLResolve xmlResolve;

    /**

     * 完成的最后操作

     */

    public void sendRE() throws IOException, ParserConfigurationException, SAXException {

        for (int i = 0; i < this.pts.getXMLURL().length; i++) {

            URL url = new URL(this.pts.getProperties(this.pts.getXMLURL()[i],"xml").getProperty("url"));

            InputStream input = url.openStream();

            SAXParserFactory factory = SAXParserFactory.newInstance();

            factory.setNamespaceAware(false);

            SAXParser parser = factory.newSAXParser();

            parser.parse(input, this.xmlResolve);

        }

      

    }

    public void setPts(PropertiesTools pts) {

        this.pts = pts;

    }

    public void setXmlResolve(XMLResolve xmlResolve) {

        this.xmlResolve = xmlResolve;

    }

}

------------------------------------

配置文件 24quan.properties

------------------------------------

#URL要抓取的API

url=http/://www.24quan.com/api/alliance.php

#该团的URL节点

deal_url=siteurl

#购买的人数节点

quantity_sold=bought

#城市节点

division_name=city

#标题节点

title=title

#图片地址节点

image_Url=image

#现在价格节点

price=price

#原来价格节点

value=value

#图片输出路径

image_path=D/://MyEclipse 8.5//tz//24juan//

-------------------------------------------------------------

主配置文件 config.properties

-------------------------------------------------------------

#写入新加的配置文件名称以,号取文件名称 对xml操作24juan,aibang,didatuan,ftuan,lashou,meituan,pintuan,sohu,tuanbao,tuanku,wowo

xmlName=24juan,aibang,didatuan,ftuan,lashou,meituan,pintuan,sohu,tuanku,tuanbao,wowo

#写入新加的配置对html操作

htmlName=nuomi,xinlang

----------------------------------------------------------------

配置文件读写类 PropertiesTools

----------------------------------------------------------------

package com.tz.tools;

import java.io.BufferedInputStream;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStream;

import java.util.ArrayList;

import java.util.HashSet;

import java.util.Iterator;

import java.util.List;

import java.util.Properties;

import java.util.Set;

import org.apache.commons.lang.StringUtils;

/**

 * 读文件

 *

 * @author Sunweikun

 *

 */

public class PropertiesTools {

    /**

     * 读取properties配置文件

     *

     * @param path

     * @param manner 方式 xml 或者 html

     * @return

     */

    public Properties getProperties(String path,String manner) {

        Properties p = new Properties();

        try {

            InputStream in = new BufferedInputStream(new FileInputStream(manner+"/"+path

                    + ".properties"));

            p.load(in);

        } catch (IOException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

        return p;

    }

    /**

     * 动态读取xml的配置文件

     *

     * @return

     */

    public String[] getXMLURL() {

        String[] paths = null;

        Properties p = new Properties();

        try {

            InputStream in = new BufferedInputStream(new FileInputStream("config.properties"));

            p.load(in);

        } catch (IOException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

        paths = p.getProperty("xmlName").split(",");

        return paths;

    }

    /**

     * 动态读取html的配置文件

     * @return

     */

    public String[] getHTMLURL() {

        String[] paths = null;

        Properties p = new Properties();

        try {

            InputStream in = new BufferedInputStream(new FileInputStream("config.properties"));

            p.load(in);

        } catch (IOException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

        paths = p.getProperty("htmlName").split(",");

        return paths;

    }

    /**

     * 去掉左右空格后字符串是否为空

     * @param astr String

     * @return boolean

     */

    public static boolean isTrimEmpty(String astr)

    {

        if ((null == astr) || (astr.length() == 0))

        {

            return true;

        }

        if (StringUtils.isBlank(astr.trim()))

        {

            return true;

        }

        return false;

    }

    /**

     * 去掉重复的List

     * @param list

     */

    @SuppressWarnings("unchecked")

    public static void removeDuplicateWithOrder(List<String> list)   

    {   

        Set set = new HashSet();   

        List newList = new ArrayList();   

        for (Iterator iter = list.iterator(); iter.hasNext();){   

            Object element = iter.next();   

            if (set.add(element)) newList.add(element);   

        }   

        list.clear();   

        list.addAll(newList);   

    }

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息