jsoup抓取借书记录
2015-12-21 22:44
323 查看
package tushuguan; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.http.Header; import org.apache.http.HeaderElement; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.ParseException; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.params.ClientPNames; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicNameValuePair; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class tushuguan { private static String LoginUrl = "http://222.200.98.171:81/login.aspx"; private static String Host = "http://222.200.98.171:81"; private static String mainUrl = ""; private static String borrowedBooksUrl = ""; private static String cookie = ""; private static String location = ""; /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub getMyBorrowedBooks(); } public static void getMyBorrowedBooks() { try { Document document = Jsoup.parse(login()); Elements elements1 = document .getElementsContainingOwnText("当前借阅情况和续借");// 通过text关键字找到所要的<a>标签 String url = elements1.first().attr("href"); for(int i=1;i<=4;i++){ borrowedBooksUrl = "http://222.200.98.171:81/user/bookborrowedhistory.aspx?page="+i;// 取值和mainUrl进行拼凑组织借阅情况地址 System.out.println("链接如下:"+borrowedBooksUrl); getBookBorrowedData(getHtml(borrowedBooksUrl)); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * 获取借书情况具体数据(List<BookEntity>) * * @param src * @return List<BookEntity> */ private static List<BookEntity> getBookBorrowedData(String src) { List<BookEntity> data = new ArrayList<BookEntity>(); Document document = Jsoup.parse(src); Element element = document.select("[id=UserMasterRight]").first() .getElementsByTag("table").first(); Elements elements2 = element.getElementsByTag("tr"); for (Element temp2 : elements2) { Elements elements3 = temp2.getElementsByTag("td"); BookEntity entity = new tushuguan().new BookEntity() .setIsFullData(elements3.get(4).text()) .setData2Return(elements3.get(1).text()) .setName(elements3.get(2).text()) .setData2Borrowed(elements3.get(0).text()); data.add(entity); } data.remove(0); System.out.println("借书情况\n"); for (BookEntity temp : data) { System.out.println(temp.getName() + "\n" + temp.getData2Borrowed() + "\n" + temp.getData2Return() + "\n" + temp.getIsFullData()); } return data; } /** * 图书馆登陆 * * @param context * @return 返回登陆后的界面Html代码 * @throws ClientProtocolException * @throws IOException */ public static String login() throws ClientProtocolException, IOException { List<NameValuePair> parmasList = new ArrayList<NameValuePair>(); parmasList = initLoginParmas("3113003802", "092137"); HttpPost post = new HttpPost(LoginUrl); post.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS, false); // 阻止自动重定向,目的是获取第一个ResponseHeader的Cookie和Location post.setHeader("Content-Type", "application/x-www-form-urlencoded;charset=gbk"); // 设置编码为GBK post.setEntity(new UrlEncodedFormEntity(parmasList, "GBK")); HttpResponse response = new DefaultHttpClient().execute(post); cookie = response.getFirstHeader("Set-Cookie").getValue(); // 取得cookie并保存起来 // System.out.println("cookie= " + cookie); location = response.getFirstHeader("Location").getValue(); // 重定向地址,目的是连接到主页 mainUrl = Host + location; // 构建主页地址 String html = getHtml(mainUrl); return html; } /** * 获取网页HTML源代码 * * @param url * @return * @throws ParseException * @throws IOException */ private static String getHtml(String url) throws ParseException, IOException { // TODO Auto-generated method stub HttpGet get = new HttpGet(url); if ("" != cookie) { get.addHeader("Cookie", cookie); } HttpResponse httpResponse = new DefaultHttpClient().execute(get); HttpEntity entity = httpResponse.getEntity(); return EntityUtils.toString(entity); } /** * 初始化参数 * * @param userName * @param passWord * @return * @throws ParseException * @throws IOException */ public static List<NameValuePair> initLoginParmas(String userName, String passWord) throws ParseException, IOException { List<NameValuePair> parmasList = new ArrayList<NameValuePair>(); HashMap<String, String> parmasMap = getLoginFormData(LoginUrl); Set<String> keySet = parmasMap.keySet(); for (String temp : keySet) { if (temp.contains("Username")) { parmasMap.put(temp, userName); } else if (temp.contains("txtPas")) { parmasMap.put(temp, passWord); } } Set<String> keySet2 = parmasMap.keySet(); System.out.println("表单内容:"); for (String temp : keySet2) { System.out.println(temp + " = " + parmasMap.get(temp)); } for (String temp : keySet2) { parmasList.add(new BasicNameValuePair(temp, parmasMap.get(temp))); } // System.out.println("initParams \n" + parmasMap); return parmasList; } /** * 获取登录表单input内容 * * @param url * @return * @throws IOException * @throws ParseException */ public static HashMap<String, String> getLoginFormData(String url) throws ParseException, IOException { Document document = Jsoup.parse(getHtml(url)); Elements element1 = document.getElementsByTag("form");// 找出所有form表单 Element element = element1.select("[method=post]").first();// 筛选出提交方法为post的表单 Elements elements = element.select("input[name]");// 把表单中带有name属性的input标签取出 HashMap<String, String> parmas = new HashMap<String, String>(); for (Element temp : elements) { parmas.put(temp.attr("name"), temp.attr("value"));// 把所有取出的input,取出其name,放入Map中 } return parmas; } class BookEntity { /** * 书名 * */ private String name; /** * 可借数 */ private String leandableNum; /** * 索引号 */ private String callNumber; /** * 作者 */ private String writer; /** * 出版社 */ private String publisher; /** * 还书时间 */ private String data2Return; /** * 借书时间 */ private String data2Borrowed; /** * 是否续满 */ private String isFullData; public BookEntity() { } public String getName() { return name; } public String getLeandableNum() { return leandableNum; } public String getCallNumber() { return callNumber; } public String getWriter() { return writer; } public String getPublisher() { return publisher; } public BookEntity setName(String name) { this.name = name; return this; } public BookEntity setLeandableNum(String leandableNum) { this.leandableNum = leandableNum; return this; } public BookEntity setCallNumber(String callNumber) { this.callNumber = callNumber; return this; } public BookEntity setWriter(String writer) { this.writer = writer; return this; } public BookEntity setPublisher(String publisher) { this.publisher = publisher; return this; } public String getData2Return() { return data2Return; } public String getData2Borrowed() { return data2Borrowed; } public String getIsFullData() { return isFullData; } public BookEntity setData2Return(String data2Return) { this.data2Return = data2Return; return this; } public BookEntity setData2Borrowed(String data2Borrowed) { this.data2Borrowed = data2Borrowed; return this; } public BookEntity setIsFullData(String isFullData) { this.isFullData = isFullData; return this; } } }
结果如下:
表单内容: __VIEWSTATE = /wEPDwULLTE0MjY3MDAxNzcPZBYCZg9kFgoCAQ8PFgIeCEltYWdlVXJsBRt+XGltYWdlc1xoZWFkZXJvcGFjNGdpZi5naWZkZAICDw8WAh4EVGV4dAUt5bm/5Lic5bel5Lia5aSn5a2m5Zu+5Lmm6aaG5Lmm55uu5qOA57Si57O757ufZGQCAw8PFgIfAQUcMjAxNeW5tDEy5pyIMjHml6UgIOaYn+acn+S4gGRkAgQPZBYEZg9kFgQCAQ8WAh4LXyFJdGVtQ291bnQCCBYSAgEPZBYCZg8VAwtzZWFyY2guYXNweAAM55uu5b2V5qOA57SiZAICD2QWAmYPFQMTcGVyaV9uYXZfY2xhc3MuYXNweAAM5YiG57G75a+86IiqZAIDD2QWAmYPFQMOYm9va19yYW5rLmFzcHgADOivu+S5puaMh+W8lWQCBA9kFgJmDxUDCXhzdGIuYXNweAAM5paw5Lmm6YCa5oqlZAIFD2QWAmYPFQMUcmVhZGVycmVjb21tZW5kLmFzcHgADOivu+iAheiNkOi0rWQCBg9kFgJmDxUDE292ZXJkdWVib29rc19mLmFzcHgADOaPkOmGkuacjeWKoWQCBw9kFgJmDxUDEnVzZXIvdXNlcmluZm8uYXNweAAP5oiR55qE5Zu+5Lmm6aaGZAIID2QWAmYPFQMbaHR0cDovL2xpYnJhcnkuZ2R1dC5lZHUuY24vAA/lm77kuabppobpppbpobVkAgkPZBYCAgEPFgIeB1Zpc2libGVoZAIDDxYCHwJmZAIBD2QWBAIDD2QWBAIBDw9kFgIeDGF1dG9jb21wbGV0ZQUDb2ZmZAIHDw8WAh8BZWRkAgUPZBYGAgEPEGRkFgFmZAIDDxBkZBYBZmQCBQ8PZBYCHwQFA29mZmQCBQ8PFgIfAQWlAUNvcHlyaWdodCAmY29weTsyMDA4LTIwMDkuIFNVTENNSVMgT1BBQyA0LjAxIG9mIFNoZW56aGVuIFVuaXZlcnNpdHkgTGlicmFyeS4gIEFsbCByaWdodHMgcmVzZXJ2ZWQuPGJyIC8+54mI5p2D5omA5pyJ77ya5rex5Zyz5aSn5a2m5Zu+5Lmm6aaGIEUtbWFpbDpzenVsaWJAc3p1LmVkdS5jbmRkZBFPBFe3T/k7AJVSx8iKDmNVbdHT ctl00$ContentPlaceHolder1$txtPas_Lib = 你猜你猜 ctl00$ContentPlaceHolder1$btnLogin_Lib = 登录 ctl00$ContentPlaceHolder1$txtlogintype = 0 ctl00$ContentPlaceHolder1$txtUsername_Lib = 3113003802 __EVENTVALIDATION = /wEWBQKs47i8AwKOmK5RApX9wcYGAsP9wL8JAqW86pcIDebecgohSzUlmvgecvTU4k49zAw= 链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=1 借书情况 回乡记 [专著]/贺雪峰主编 2015-09-01 2015-10-15 A3226253 土木工程CAD基础 [专著]:AutoCAD软件基础教程=CAD foundation of civil engineering:AutoCAD software basic course/邓芃主编 2015-07-20 2015-10-15 A3138201 李光耀传 [专著]/凌翔著 2015-07-20 2015-10-15 A3210306 工程CAD基础理论与上机操作习题集 [专著]/于奕峰,杨松林主编 2015-07-20 2015-10-15 A3258522 消失的17岁 [专著]/(美) 诺瓦·伦·苏玛著=17 & gone/Nova Ren Suma;刘丽洁译 2015-06-03 2015-09-01 A3213437 汤姆叔叔的小屋 [专著]=Uncle tom's cabin:插图·中文导读英文版/(美)比彻·斯托夫人著;王勋,纪飞等编译 2015-03-27 2015-06-01 A3002490 商务口译 [专著]=Business interpreting/刘建珠主编 2015-03-27 2015-06-01 A3003500 2014年季度精选集 [汇编]·春季卷/《读者·乡土人文版》编辑部主编 2015-03-27 2015-06-01 A3210150 可口可乐不规则营销 [专著]/(美)洛威尔著;龙文元译 2015-03-17 2015-06-16 A1501833 工程经济学 [专著]/关罡, 郝彤主编 2015-03-17 2015-04-29 A3109697 链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=2 借书情况 讴歌母爱 关注人生 [专著]:冰心小说全集/冰 心著 2015-03-03 2015-06-01 A5143376 林徽因小说:九十九度中/林徽因[著];陈学勇编选 2015-03-03 2015-06-01 A5188772 骆驼祥子·黑白李 [专著]/老舍著 2015-03-03 2015-06-01 A0957524 1937年的爱情 [专著]/叶兆言著 2014-11-27 2015-01-10 A1509614 理工大风流往事 [专著]/zt著 2014-11-27 2014-12-16 A1847222 酒殇 [专著]:一个酒业王国的兴衰/杨小凡著 2014-11-27 2015-01-10 A1948680 那时年少 [专著]/一草著 2014-11-27 2014-12-16 A2992422 不能承受的生命之轻 [专著]/(捷克斯洛伐克)米兰·昆德拉(Milan Kundera)著=L'insoutenable legerete de l'etre/许钧译 2014-11-18 2015-01-10 A0520872 读者精华本 [汇编]/万文海主编 2014-11-18 2015-01-10 A1547276 谁在让子弹飞 [专著]/曹保印著 2014-11-18 2014-12-16 A3147373 链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=3 借书情况 孤独是不人道的 [专著]/郭鹏著 2014-11-18 2014-12-16 A3147367 且听风吟 [专著]/(日)村上春树著;林少华译 2014-09-23 2014-11-10 A2516969 可怕的巧合 [专著]/石岩编著 2014-09-23 2014-11-13 A3158433 你好,总统 [专著]:乌戈·查韦斯与他的委内瑞拉=Comandante:inside Hugo Chavez's venezuela/(英)洛里·卡洛尔(Rory Carroll)著;徐天鹏译 2014-09-23 2014-11-13 A3129490 肝胆相照 [专著]:吴孟超传/方鸿辉著 2014-09-23 2014-11-10 A3139385 林徽因经典作品 [专著]:你是人间的四月天九十九度中/林徽因著 2014-05-19 2014-07-10 A2386519 梁思成的山河岁月 [专著]/林与舟编著 2014-05-19 2014-05-27 A1210449 人物中国 [汇编]/龚莉主编;《人物中国》编委会编 2014-05-19 2014-07-10 A2603584 百年大案追踪 [专著]/郭学德,崔爱鹏,李海涛著 2014-04-24 2014-06-11 A0283139 聚焦名人名案 [专著]/窦欣平,叶知秋著 2014-04-24 2014-06-11 A0547714 链接如下:http://222.200.98.171:81/user/bookborrowedhistory.aspx?page=4 借书情况 孙子兵法经典故事 [专著]/李济生编著 2014-04-24 2014-06-17 A0565277 危险游戏 [汇编]:典型犯罪案例评说/郭春孚,张翔鹰主编 2014-04-24 2014-06-03 A1360621 家庭常用药物手册 [专著]/白禾夏主编 2014-03-17 2014-03-25 A0483737 药用观赏植物栽培与利用 [专著]/张永清编著 2014-03-17 2014-04-13 A0614935 排毒不如无毒 [专著]:远离生活中的有毒物质/(美) 黛布拉·林恩·戴德著 ;常媛译=Toxic free: how to protect your health and home from the chemicals that are making you sick 2014-03-17 2014-03-25 A3116154 新版以案说法 [专著]/曾宪义总主编 2014-02-27 2014-03-17 A1595640 飞去的诗人:徐志摩传 [专著]/展望之,张方晦著 2014-02-25 2014-03-20 A8152588 高四凶猛 [专著]/耿萧著 2014-02-25 2014-02-27 A0547642
其实我是转载改了点东西而已:http://my.oschina.net/dfsfsdf/blog/116279?fromerr=jQsroe5A
相关文章推荐
- JS入门经典笔记
- js使用file上传图片-----(1)选择文件类型的判断和图片的显示
- How to check the version of JSF
- JS对象转化为JSON字符串
- JS_数据类型
- javascript 浏览器调用原理
- js的一些学习笔记2
- 玩玩EXPRESSJS
- JSP第二篇
- Javascript数组常用方法
- JS入门笔记
- javascript大神修炼记(7)——OOP思想(多态)
- js回调函数
- JavaScript具有自动垃圾回收机制
- 用js实现同一页面多个不同运动效果2
- JavaScript 开发进阶:理解 JavaScript 作用域和作用域链
- javascript作用域链-(1)
- 用js实现同一页面多个不同运动效果
- JS、C#编码解码
- yformater - chrome谷歌浏览器json格式化json高亮json解析插件