您的位置:首页 > 其它

IT资讯--------抓取各个博客上的文章

2016-03-02 15:37 267 查看
该App已经上传到百度应用市场:http://shouji.baidu.com/soft/item?docid=8928185&from=as&f=search_app_it%E8%B5%84%E8%AE%AF%40list_1_image%402%40header_all_input

有兴趣的可以下载看看。接下来我会公布源代码,不过该App并没有使用代码混淆,所以可以通过反编译清楚的看到源码。

CSDN 的博客文章 :

1.文章的信息:

public class NewsItem {
private int id;
//标题
private String title;
//链接
private String link;
//发布时间
private String date;
//图片链接
private String picLink;
//内容
private String content;
//类型
private int newsType;

public NewsItem() {
}

public int getId() {
return id;
}

public void setId(int id) {
this.id = id;
}

public int getNewsType() {
return newsType;
}

public void setNewsType(int newsType) {
this.newsType = newsType;
}

public String getContent() {
return content;
}

public void setContent(String content) {
this.content = content;
}

public String getPicLink() {
return picLink;
}

public void setPicLink(String picLink) {
this.picLink = picLink;
}

public String getDate() {
return date;
}

public void setDate(String date) {
this.date = date;
}

public String getLink() {
return link;
}

public void setLink(String link) {
this.link = link;
}

public String getTitle() {
return title;
}

public void setTitle(String title) {
this.title = title;
}
}


抓取文章的:

**
* Created by Administrator on 2015/11/13.
* 处理NewItem的业务类
*/
public class NewsItemBiz {

public List<NewsItem> getNewsItems(int newTypes, int currentPage) throws CommonExecption {
String urlStr = URLUtil.generateUrl(newTypes, currentPage);
String htmlStr = DataUtil.doGet(urlStr, "UTF-8");

List<NewsItem> newsItems = new ArrayList<NewsItem>();
NewsItem newsItem = null;
Document doc = Jsoup.parse(htmlStr);
Elements units = doc.getElementsByClass("unit");
for (int i = 0; i < units.size(); i++) {
newsItem = new NewsItem();
newsItem.setNewsType(newTypes);

Element unit_ele = units.get(i);
Element h1_ele = unit_ele.getElementsByTag("h1").get(0);
Element h1_a_ele = h1_ele.child(0);
String title = h1_a_ele.text();
title = AppUtil.encoding(title,"utf-8");
String hred = h1_a_ele.attr("href");
hred = AppUtil.encoding(hred,"utf-8");
newsItem.setLink(hred);
newsItem.setTitle(title);

Element h4_ele = unit_ele.getElementsByTag("h4").get(0);
Element ago_ele = h4_ele.getElementsByClass("ago").get(0);
String date = ago_ele.text();
date = AppUtil.encoding(date,"utf-8");
newsItem.setDate(date);

Element d1_ele = unit_ele.getElementsByTag("dl").get(0);
Element dt_ele = d1_ele.child(0);

try {
Element img_ele = dt_ele.child(0);
String imgLink = img_ele.child(0).attr("src");
//    System.out.println("link--------"+imgLink);
imgLink = AppUtil.encoding(imgLink,"utf-8");
newsItem.setPicLink(imgLink);
} catch (IndexOutOfBoundsException e) {
e.printStackTrace();
}
Element content_ele = d1_ele.child(1);
String content = content_ele.text();
content = AppUtil.encoding(content,"utf-8");
newsItem.setContent(content);
newsItems.add(newsItem);
}
return newsItems;
}

public NewsDto getNews(String urlStr) throws CommonExecption {
NewsDto newsDto = new NewsDto();
List<News> newses = new ArrayList<>();
String htmlStr = DataUtil.doGet(urlStr, "UTF-8");
Document doc = Jsoup.parse(htmlStr);

// 获得文章中的第一个detail
//    System.out.println(htmlStr);
Element detailEle = doc.select(".left .detail").get(0);
// 标题
Element titleEle = detailEle.select("h1.title").get(0);
News news = new News();
String title = titleEle.text();
title = AppUtil.encoding(title,"utf-8");
news.setTitle(title);
news.setType(Constant.TITLE);
newses.add(news);
// 摘要
Element summaryEle = detailEle.select("div.summary").get(0);
news = new News();
String summary = summaryEle.text();
summary = AppUtil.encoding(summary,"utf-8");
news.setSumary(summary);
news.setType(Constant.SUMMARY);
newses.add(news);
// 内容
Element contentEle = detailEle.select("div.con.news_content").get(0);
Elements childrenEle = contentEle.children();

for (Element child : childrenEle) {
Elements imgEles = child.getElementsByTag("img");
// 图片
if (imgEles.size() > 0) {
for (Element imgEle : imgEles) {
if (imgEle.attr("src").equals(""))
continue;
news = new News();
String imgLink = imgEle.attr("src");
imgLink = AppUtil.encoding(imgLink,"utf-8");
news.setImageLink(imgLink);
news.setType(Constant.IMG);
newses.add(news);
}
}
// 移除图片
imgEles.remove();

if (child.text().equals(""))
continue;

news = new News();
news.setType(Constant.CONTENT);

try {
if (child.children().size() == 1) {
Element cc = child.child(0);
if (cc.tagName().equals("b")) {
news.setType(Constant.BOLD_TITLE);
}
}

} catch (IndexOutOfBoundsException e) {
e.printStackTrace();
}
String content = child.outerHtml();
content = AppUtil.encoding(content,"utf-8");
news.setContent(content);
newses.add(news);
}
newsDto.setNewses(newses);
return newsDto;
}

}


URL的处理:

public class URLUtil {

public static final String NEWS_LIST_URL = "http://www.csdn.net/headlines.html";
public static final String NEWS_LIST_URL_YIDONG = "http://mobile.csdn.net/mobile";
public static final String NEWS_LIST_URL_YANFA = "http://sd.csdn.net/sd";
public static final String NEWS_LIST_URL_YUNJISUAN = "http://cloud.csdn.net/cloud";
public static final String NEWS_LIST_URL_ZAZHI = "http://programmer.csdn.net/programmer";
public static final String NEWS_LIST_URL_YEJIE = "http://news.csdn.net/news";

public static String generateUrl(int newType,int currentPage){
currentPage=currentPage>0 ? currentPage :1;
String urlstr="";
switch (newType){
case Constant.NEW_TYPE_YEJIE:
urlstr=NEWS_LIST_URL_YEJIE;
break;
case Constant.NEW_TYPE_YANFA:
urlstr=NEWS_LIST_URL_YANFA;
break;

case Constant.NEW_TYPE_YUNJISUAN:
urlstr=NEWS_LIST_URL_YUNJISUAN;
break;
case Constant.NEW_TYPE_YIDONG:
urlstr=NEWS_LIST_URL_YIDONG;
break;
case Constant.NEW_TYPE_CHENGXUYUAN:
urlstr=NEWS_LIST_URL_ZAZHI;
break;
default:
urlstr=NEWS_LIST_URL;
}
urlstr +="/"+currentPage;
return urlstr;
}
}


访问网络:

public class DataUtil {
/**
* 通过传入url链接访问网络,获取网页的html数据
*
* @param urlstr
* @return
* @throws CommonExecption
*/
public static String doGet(String urlstr, String uncode) throws CommonExecption {
StringBuffer sb = new StringBuffer();
try {
/*     URL url = new URL(urlstr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5000);
conn.setDoInput(true);
conn.setDoOutput(true);
*/

HttpClient client = new HttpClient();
GetMethod getMethod = new GetMethod(urlstr);

getMethod.addRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
getMethod.addRequestHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
getMethod.addRequestHeader("Host","www.csdn.net");
getMethod.addRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0");
getMethod.addRequestHeader("Connection","keep-alive");
int state= client.executeMethod(getMethod);
if (state== 200) {
InputStream is = getMethod.getResponseBodyAsStream();
int len = 0;
byte[] buf = new byte[1024];
while ((len = is.read(buf)) != -1) {
sb.append(new String(buf, 0, len, uncode));
}
is.close();
} else {
throw new CommonExecption("访问网络失败");
}
} catch (Exception e) {
throw new CommonExecption("访问网络失败");
}
return sb.toString();
}
}


关于异常处理:

public class CommonExecption extends Exception{

public CommonExecption(){
super();
}

public CommonExecption(String message,Throwable cause){
super(message,cause);
}

public CommonExecption(String message){
super(message);
}

public CommonExecption(Throwable casuse){
super(casuse);
}


二 博客园

访问网络:

public class BlogHouseDataUtil {

/**
* 返回该链接地址的html数据
*
* @param urlStr
* @return
* @throws
*/
public static String doGet(String urlStr,int currentPage,int newType) throws CommonExecption
{
StringBuffer sb = new StringBuffer();
try
{

HttpClient client=new HttpClient();
PostMethod post =new PostMethod(urlStr);
switch(newType){
case Constant.NEWS_TYPE_HOME:
post.addParameter("CategoryType", "SiteHome");
post.addParameter("CategoryId",String.valueOf(808));
post.addParameter("ItemListActionName", "PostList");
break;
case Constant.NEWS_TYPE_PICK:
post.addParameter("CategoryType", "Picked");
post.addParameter("CategoryId",String.valueOf(-2));
post.addParameter("ItemListActionName", "PostList");
break;
case Constant.NEWS_TYPE_CANDIDATE:
post.addParameter("CategoryType", "HomeCandidate");
post.addParameter("CategoryId",String.valueOf(108697));
post.addParameter("ItemListActionName", "PostList");
break;
case Constant.NEWS_TYPE_NEWS:
post.addParameter("CategoryType", "News");
post.addParameter("CategoryId",String.valueOf(-1));
post.addParameter("ItemListActionName", "NewsList");
break;

}

post.addParameter("PageIndex",String.valueOf(currentPage));
post.addParameter("ParentCategoryId",String.valueOf(0));
int state=client.executeMethod(post);

if (state == 200)
{
InputStream is = post.getResponseBodyAsStream();
int len = 0;
byte[] buf = new byte[1024];

while ((len = is.read(buf)) != -1)
{
sb.append(new String(buf, 0, len, "UTF-8"));
}
is.close();
} else
{
throw new CommonExecption("访问网络失败!");
}

} catch (Exception e)
{
e.printStackTrace();
throw new CommonExecption("访问网络失败!");
}

return sb.toString();
}
}


URL处理

public class Blog_URLUtil {

public static final String HOME_URL="http://www.cnblogs.com/#p";                       //首页
public static final String PICK_URL="http://www.cnblogs.com/pick/#p";                  //精华
public static final String CANDIDATE_URL="http://www.cnblogs.com/candidate/#p";        //候选
public static final String NEWS_URL="http://www.cnblogs.com/news/#p";                  //新闻

/**
* 根据文章类型,和当前页码生成url
* @param newsType
* @param currentPage
* @return
*/
public static String generateUrl(int newsType, int currentPage)
{
currentPage = currentPage > 0 ? currentPage : 1;
String urlStr = "";

switch (newsType)
{
case Constant.NEWS_TYPE_HOME:
urlStr = HOME_URL;
break;
case Constant.NEWS_TYPE_PICK:
urlStr = PICK_URL;
break;
case Constant.NEWS_TYPE_CANDIDATE:
urlStr =  CANDIDATE_URL;
break;
case Constant.NEWS_TYPE_NEWS:
urlStr = NEWS_URL;
break;
default:
break;
}

urlStr += ""+currentPage;
return urlStr;

}

}


抓取文章 :

public class NewItemBlogHouse {

public List<NewsItem> getNewsItems(int newsType, int currentPage) throws CommonExecption {
String urlStr = Blog_URLUtil.generateUrl(newsType, currentPage);

String htmlStr = BlogHouseDataUtil.doGet(urlStr, currentPage, newsType);
System.out.println("htmlStr------" + htmlStr);
List<NewsItem> newsItems = new ArrayList<NewsItem>();
NewsItem newsItem = null;
Document doc = Jsoup.parse(htmlStr);
Elements units = doc.getElementsByClass("post_item_body");
// System.out.println("--------"+units.toString());
for (int i = 0; i < units.size(); i++) {
newsItem = new NewsItem();
newsItem.setNewsType(newsType);

Element unit_ele = units.get(i);

Element h1_ele = unit_ele.getElementsByTag("h3").get(0);
Element h1_a_ele = h1_ele.child(0);
String title = h1_a_ele.text();
title = AppUtil.encoding(title, "utf-8");
String href = h1_a_ele.attr("href");
href = AppUtil.encoding(href, "utf-8");
newsItem.setLink(href);
newsItem.setTitle(title);
//     System.out.println("href---------->"+href);
//     System.out.println("title---------->"+title);

Element div_date = unit_ele.getElementsByTag("div").get(1);
String date = div_date.text();
// String span_ele=p_ele.getElementsByTag("span").get(0).text();
// System.out.println("---------"+text);
date = AppUtil.encoding(date, "utf-8");
newsItem.setDate(date);

try {// 可能没有图片
Element div_pic = unit_ele.getElementsByTag("div").get(0);
Element p_pic = div_pic.getElementsByTag("a").get(1);
Element img = p_pic.child(0);
String imgLink = img.attr("src");
// System.out.println(imgLink);
imgLink = AppUtil.encoding(imgLink, "utf-8");
newsItem.setPicLink(imgLink);
} catch (IndexOutOfBoundsException e) {
System.out.println("没有图片");
}

Element div_content = unit_ele.getElementsByTag("div").get(0);
Element p_content = div_content.getElementsByTag("p").get(0);
String content = p_content.text();
// System.out.println("--------"+content);
content = AppUtil.encoding(content, "utf-8");
newsItem.setContent(content);
newsItems.add(newsItem);
}
return newsItems;

}

}


三 51CTO

网络请求:

public class DataUtil {
/**
* 通过传入url链接访问网络,获取网页的html数据
*
* @param urlstr
* @return
* @throws CommonExecption
*/
public static String doGet(String urlstr, String uncode) throws CommonExecption {
StringBuffer sb = new StringBuffer();
try {
/*     URL url = new URL(urlstr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(5000);
conn.setDoInput(true);
conn.setDoOutput(true);
*/

HttpClient client = new HttpClient();
GetMethod getMethod = new GetMethod(urlstr);

getMethod.addRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
getMethod.addRequestHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
getMethod.addRequestHeader("Host","www.csdn.net");
getMethod.addRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0");
getMethod.addRequestHeader("Connection","keep-alive");
int state= client.executeMethod(getMethod);
if (state== 200) {
InputStream is = getMethod.getResponseBodyAsStream();
int len = 0;
byte[] buf = new byte[1024];
while ((len = is.read(buf)) != -1) {
sb.append(new String(buf, 0, len, uncode));
}
is.close();
} else {
throw new CommonExecption("访问网络失败");
}
} catch (Exception e) {
throw new CommonExecption("访问网络失败");
}
return sb.toString();
}
}


URL处理

public class CTO_URLUtil {

public static final String FIRST_URL="http://blog.51cto.com/artcommend";
public static final String NETWORK_URL="http://blog.51cto.com/artcommend/14";   //网络开发
public static final String DEVELOP_URL="http://blog.51cto.com/artcommend/8";    //开发技术������
public static final String ADMIN_URL="http://blog.51cto.com/artcommend/9";       //IT管理���
public static final String LIFE_URL="http://blog.51cto.com/artcommend/12";        //IT生活���

/**
* 根据文章类型,和当前页码生成url
* @param newsType
* @param currentPage
* @return
*/
public static String generateUrl(int newsType, int currentPage)
{
currentPage = currentPage > 0 ? currentPage : 1;
String urlStr = "";

switch (newsType)
{
case Constant.NEWS_TYPE_NETWORK:
urlStr = NETWORK_URL;
break;
case Constant.NEWS_TYPE_DEVELOPMENT:
urlStr = DEVELOP_URL;
break;
case Constant.NEWS_TYPE_IT_ADMIN:
urlStr =  ADMIN_URL;
break;
case Constant.NEWS_TYPE_IT_LIFE:
urlStr = LIFE_URL;
break;
default:
break;
}

urlStr += "/" + currentPage;

return urlStr;

}

}


抓取文章 :

public class NewItem51CTO {
/**
* 处理开发   网络管理  ,IT生活.....
*
* @param newsType
* @param currentPage
* @return
* @throws CommonExecption
*/
public List<NewsItem> getNewsItems(int newsType, int currentPage) throws CommonExecption {
String urlStr = CTO_URLUtil.generateUrl(newsType, currentPage);

String htmlStr = DataUtil.doGet(urlStr, "GB2312");
List<NewsItem> newsItems = new ArrayList<>();
NewsItem newsItem = null;

Document doc = Jsoup.parse(htmlStr);
Elements units = doc.getElementsByClass("r_li");

for (int i = 0; i < units.size(); i++) {
newsItem = new NewsItem();
newsItem.setNewsType(newsType);

Element unit_ele = units.get(i);

Element h1_ele = unit_ele.getElementsByTag("h4").get(0);
Element h1_a_ele = h1_ele.child(0);
String title = h1_a_ele.text();
title= AppUtil.encoding(title,"utf-8");
String href = h1_a_ele.attr("href");
href=AppUtil.encoding(href,"utf-8");
newsItem.setLink(href);
newsItem.setTitle(title);
// System.out.println(href);

Element div_date = unit_ele.getElementsByTag("div").get(2);
Element p_ele = div_date.getElementsByTag("p").get(0);
String span_ele = p_ele.getElementsByTag("span").get(0).text();
// System.out.println("---------"+div_date.toString());
span_ele=AppUtil.encoding(span_ele,"utf-8");
newsItem.setDate(span_ele);

try {// 可能没有图片
Element div_pic = unit_ele.getElementsByTag("div").get(1);
Element p_pic = div_pic.getElementsByTag("a").get(0);
Element img = p_pic.child(0);
String imgLink = img.attr("src");
imgLink=AppUtil.encoding(imgLink,"utf-8");
newsItem.setPicLink(imgLink);
} catch (IndexOutOfBoundsException e) {
System.out.println("数组边界异常");
}
Element div_content = unit_ele.getElementsByTag("div").get(1);
Element p_content = div_content.getElementsByTag("p").get(0);
String content = p_content.text();
content=AppUtil.encoding(content,"utf-8");
// System.out.println("--------"+content);
newsItem.setContent(content);
newsItems.add(newsItem);
}
return newsItems;
}

}


四 ITeye

网络请求 :

public class ITeyeDataUtil {

/**
* 返回该链接地址的html数据
*
* @param urlStr
* @return
* @throws
*/
public static String doGet1(String urlStr,int currentPage,int newType,int useAgentNum) throws CommonExecption
{
StringBuffer sb = new StringBuffer();
try
{
URL url = new URL(urlStr);
HttpURLConnection conn =(HttpURLConnection) url.openConnection();
conn.setRequestMethod("GET");
conn.setConnectTimeout(8000);
conn.setDoInput(true);
conn.setDoOutput(true);
System.out.println("code-----" + conn.getResponseCode());
// conn.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1");

/**
* 更换代理
*/
String []useAgent=new String[]{"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
" Mozilla/4.0 (compatible; MSIE 6.0; ) Opera/UCWEB7.0.2.37/28/999"};

switch(useAgentNum){
case 14:
conn.setRequestProperty("User-Agent", useAgent[0]);
break;
case 15:
conn.setRequestProperty("User-Agent", useAgent[1]);
break;
case 16:
conn.setRequestProperty("User-Agent", useAgent[2]);
break;
case 17:
conn.setRequestProperty("User-Agent", useAgent[3]);
break;
}

if (conn.getResponseCode()==HttpURLConnection.HTTP_OK)
{
InputStream is =conn.getInputStream();
int len = 0;
byte[] buf = new byte[1024];

while ((len = is.read(buf)) != -1)
{
sb.append(new String(buf, 0, len, "UTF-8"));
}
is.close();
} else
{
throw new CommonExecption("访问网络失败!");
}

} catch (Exception e)
{
e.printStackTrace();
throw new CommonExecption("访问网络失败!");
}

return sb.toString();
}

public static String doGet(String urlStr) throws CommonExecption
{
StringBuffer sb = new StringBuffer();
try
{

HttpClient client=new HttpClient();
GetMethod getMethod=new GetMethod(urlStr);

getMethod.addRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:43.0) Gecko/20100101 Firefox/43.0");
getMethod.addRequestHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
getMethod.addRequestHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");
getMethod.addRequestHeader("Host","www.iteye.com");
getMethod.addRequestHeader("Connection","keep-alive");
getMethod.addRequestHeader("Referer","http://www.iteye.com/news");

client.getParams().setParameter("http.protocol.cookie-policy", CookiePolicy.BROWSER_COMPATIBILITY);
int state=client.executeMethod(getMethod);

System.out.println("state2-----"+state);
if (state==200)
{
InputStream is =getMethod.getResponseBodyAsStream();
BufferedInputStream buff=new BufferedInputStream(is);
BufferedReader reader=null;
reader=new BufferedReader(new InputStreamReader(getMethod.getResponseBodyAsStream()));
String line = "";
while ((line=reader.readLine())!=null)
{
sb.append(line + "\n");
}
is.close();
} else
{
throw new CommonExecption("访问网络失败");
}

} catch (Exception e)
{
e.printStackTrace();
throw new CommonExecption("访问网络失败");
}

return sb.toString();
}
}


URL处理:

public class ITEYE_URLUtil {
public static final String NEWS_URL="http://www.iteye.com/news?page=";                       //资讯
public static final String MAGAZINES_URL="http://www.iteye.com/magazines?page=";            //精华
public static final String BLOG_URL="http://www.iteye.com/blogs?page=";                    //博客
public static final String SUBJECTS_URL="http://www.iteye.com/blogs/subjects?page=";      //专栏

/**
* 根据文章类型,和当前页码生成url
* @param newsType
* @param currentPage
* @return
*/
public static String generateUrl(int newsType, int currentPage)
{
currentPage = currentPage > 0 ? currentPage : 1;
String urlStr = "";

switch (newsType)
{
case Constant.NEWS_TYPE_NEW:
urlStr = NEWS_URL;
break;
case Constant.NEWS_TYPE_MAGAZINES:
urlStr = MAGAZINES_URL;
break;
case Constant.NEWS_TYPE_BLOGS:
urlStr =BLOG_URL;
break;
case Constant.NEWS_TYPE_SUBJECTS:
urlStr =SUBJECTS_URL;
break;
default:
break;
}

urlStr +=currentPage;
return urlStr;

}

}


抓取文章 :

public class NewItemITeye {

public List<NewsItem> getNewsItems(int newsType, int currentPage, int useAgentNum)
throws CommonExecption {
String urlStr = ITEYE_URLUtil.generateUrl(newsType, currentPage);
String htmlStr = ITeyeDataUtil.doGet1(urlStr, newsType, currentPage, 12);
List<NewsItem> newsItems = new ArrayList<NewsItem>();
NewsItem newsItem = null;

Document doc = Jsoup.parse(htmlStr);
Elements units = doc.getElementsByClass("content");

for (int i = 0; i < units.size(); i++) {
newsItem = new NewsItem();
newsItem.setNewsType(newsType);

Element unit_ele = units.get(i);

Element h3_ele = unit_ele.getElementsByTag("h3").get(0);
//解析时间
Element span_ele = null;
switch (newsType) {
case Constant.NEWS_TYPE_NEW:
Element a_ele = h3_ele.getElementsByTag("a").get(1);

String title = a_ele.text();
title = AppUtil.encoding(title, "utf-8");
newsItem.setTitle(title);
String href = a_ele.attr("href");
href = AppUtil.encoding(href, "utf-8");
StringBuffer sb = new StringBuffer();
sb.append("http://www.iteye.com").append(href);
newsItem.setLink(sb.toString());

Element div_ele = unit_ele.getElementsByTag("div").get(3);
if (div_ele.getElementsByTag("span").size() >= 3) {
span_ele = div_ele.getElementsByTag("span").get(2);
} else {
span_ele = div_ele.getElementsByTag("span").get(1);
}
//获取图片链接
try {// 可能没有图片
Element img_ele = h3_ele.child(0);
String imgLink = img_ele.attr("src");
// System.out.println(imgLink);
imgLink = AppUtil.encoding(imgLink, "utf-8");
newsItem.setPicLink(imgLink);
} catch (IndexOutOfBoundsException e) {
System.out.println("没有图片");
}
break;
case Constant.NEWS_TYPE_BLOGS:
Element a_ele1 = h3_ele.getElementsByTag("a").get(1);
// System.out.println("a_ele----------->"+a_ele.toString());
String title1 = a_ele1.text();
title1 = AppUtil.encoding(title1, "utf-8");
newsItem.setTitle(title1);
String href1 = a_ele1.attr("href");
href1 = AppUtil.encoding(href1, "utf-8");
newsItem.setLink(href1);
Element div_ele2 = unit_ele.getElementsByTag("div").get(4);
if (div_ele2.getElementsByTag("span").size() >= 3) {
span_ele = div_ele2.getElementsByTag("span").get(4);
} else {
span_ele = div_ele2.getElementsByTag("span").get(1);
}
//获取图片链接
try {// 可能没有图片
Element img_ele = unit_ele.getElementsByTag("div").get(2);
Element a1_ele = img_ele.getElementsByTag("a").get(0);
Element img = a1_ele.getElementsByTag("img").get(0);
String imgLink = img.attr("src");
imgLink = AppUtil.encoding(imgLink, "utf-8");
newsItem.setPicLink(imgLink);
// System.out.println("img--------"+imgLink);
} catch (IndexOutOfBoundsException e) {
System.out.println("没有图片");
}
break;
case Constant.NEWS_TYPE_MAGAZINES:
Element a_ele2 = h3_ele.getElementsByTag("a").get(0);
String title2 = a_ele2.text();
title2 = AppUtil.encoding(title2, "utf-8");
newsItem.setTitle(title2);
String href2 = a_ele2.attr("href");
href2 = AppUtil.encoding(href2, "utf-8");
StringBuffer sb_href = new StringBuffer();
sb_href.append("http://www.iteye.com").append(href2);
//System.out.println("sb_href-------"+sb_href.toString());
newsItem.setLink(sb_href.toString());

Element div_ele3 = unit_ele.getElementsByTag("div").get(3);
if (div_ele3.getElementsByTag("span").size() >= 3) {
span_ele = div_ele3.getElementsByTag("span").get(2);
} else {
span_ele = div_ele3.getElementsByTag("span").get(1);
}
//获取图片链接
try {// 可能没有图片
Element img_ele = h3_ele.child(0);
String imgLink = img_ele.attr("src");
// System.out.println(imgLink);
imgLink = AppUtil.encoding(imgLink, "utf-8");
newsItem.setPicLink(imgLink);
} catch (IndexOutOfBoundsException e) {
System.out.println("没有图片");
}
break;
case Constant.NEWS_TYPE_SUBJECTS:
Element a_ele3 = h3_ele.getElementsByTag("a").get(1);
// System.out.println("a_ele----------->"+a_ele.toString());
String title3 = a_ele3.text();
title3 = AppUtil.encoding(title3, "utf-8");
newsItem.setTitle(title3);
String href3 = a_ele3.attr("href");
href3 = AppUtil.encoding(href3, "utf-8");
Element a_ele_3 = h3_ele.getElementsByTag("a").get(1);
// System.out.println("a_ele----------->"+a_ele.toString());
newsItem.setLink(href3);
Element div_ele1 = unit_ele.getElementsByTag("div").get(4);
if (div_ele1.getElementsByTag("span").size() >= 3) {
span_ele = div_ele1.getElementsByTag("span").get(1);
} else {
span_ele = div_ele1.getElementsByTag("span").get(2);
}
//获取图片链接
try {// 可能没有图片
Element img_ele = unit_ele.getElementsByTag("div").get(2);
Element a1_ele = img_ele.getElementsByTag("a").get(0);
Element img = a1_ele.getElementsByTag("img").get(0);
String imgLink = img.attr("src");
imgLink = AppUtil.encoding(imgLink, "utf-8");
newsItem.setPicLink(imgLink);
} catch (IndexOutOfBoundsException e) {
System.out.println("没有图片");
}
break;

}

String date = span_ele.text();
date = AppUtil.encoding(date, "utf-8");
StringBuffer date_buffer = new StringBuffer();
date_buffer.append("发布于").append(" ").append(date);
newsItem.setDate(date_buffer.toString());

Element h1_ele = unit_ele.getElementsByTag("div").get(1);
String content = h1_ele.text();
content = AppUtil.encoding(content, "utf-8");
// System.out.println("h1_ele---------->"+content);
// System.out.println("--------"+content);
newsItem.setContent(content);
newsItems.add(newsItem);
}
return newsItems;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: