微信开发纪实之数字图书馆服务
2015-02-15 20:02
211 查看
微信开发纪实之数字图书馆服务
最近在做微信的后台开发,看到有些高校的微信公众平台上有查询借阅和续借的功能,自己闲来无事也瞎鼓捣了一个,现在把成果贴出来和大家一起分享。
由于本人在武汉大学,故以武汉大学图书馆为例。
思路:
1. 模拟登陆
2. 抓取数据
语言:java
代码:DigitalLibraryService.java
packagecom.wdyx.weixin.service;
importjava.io.IOException;
importjava.util.ArrayList;
importjava.util.HashMap;
importjava.util.List;
importjava.util.Set;
importorg.apache.http.HttpEntity;
importorg.apache.http.HttpResponse;
importorg.apache.http.NameValuePair;
importorg.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
importorg.apache.http.client.methods.HttpPost;
importorg.apache.http.entity.BufferedHttpEntity;
importorg.apache.http.impl.client.CloseableHttpClient;
importorg.apache.http.impl.client.HttpClients;
importorg.apache.http.message.BasicNameValuePair;
importorg.apache.http.util.EntityUtils;
importorg.jsoup.Jsoup;
importorg.jsoup.nodes.Document;
importorg.jsoup.nodes.Element;
importorg.jsoup.select.Elements;
/**
* 数字图书馆服务
* 功能:查询个人信息、借阅信息、借阅历史,以及进行续借操作
* @author帮杰
*/
public classDigitalLibraryService {
//主机地址
public static final String HOST ="http://metalib.lib.whu.edu.cn";
//登陆页面地址
public static final String LOGIN_URL ="http://apps.lib.whu.edu.cn/web/login.asp";
//表单post地址
public static final String POST_URL ="http://metalib.lib.whu.edu.cn:80/pds";
//借阅者信息
private String borrowerInfo = null;
//借阅信息
private String borrowInfo = null;
//借阅历史
private String borrowHistory = null;
//储
4000
存登录页,以备续借之需
private String renewPage = null;
//每一个客户即为一个HttpClient对象
private CloseableHttpClient httpclient= null;
//构造函数,以用户名和密码作为参数
public DigitalLibraryService(Stringusername,String password) throws ClientProtocolException, IOException{
//初始化HttpClient对象
httpclient =HttpClients.createDefault();
//解析登录页
Document doc =Jsoup.parse(login(username, password, httpclient));
//“我的借阅信息”所对应的url
String BorrowerInfoUrl =doc.getElementsContainingOwnText("我的借阅信息").first().attr("href");
//据观察有重定向;获得重定向地址
StringRelocatedBorrowerInfoUrl =HttpUtil.getHtml(BorrowerInfoUrl,httpclient).split("\'")[1];
//重定向页
StringRelocatedBorrowerInfoHtml =HttpUtil.getHtml(RelocatedBorrowerInfoUrl,httpclient);
//借阅者信息页
String BorrowerInfoHtml =HttpUtil.getHtml(HOST + HtmlUtil.getLinks(RelocatedBorrowerInfoHtml).get(0),httpclient);
//通过抓取获得借阅者信息
borrowerInfo =getBorrowerInfo(BorrowerInfoHtml);
doc =Jsoup.parse(BorrowerInfoHtml);
//"当前借阅数:"后面的数字链接指向借阅信息页,故此处取得该链接
String BorrowInfoUrl =doc.getElementsContainingOwnText("当前借阅数:").parents().select("a").attr("href").split("\'")[1];
//借阅历史页面链接
String BorrowHistoryUrl =doc.getElementsContainingOwnText("借阅历史").attr("href");
//借阅信息页
String BorrowInfoHtml =HttpUtil.getHtml(BorrowInfoUrl, httpclient);
//借阅历史页
String BorrowHistoryHtml =HttpUtil.getHtml(BorrowHistoryUrl, httpclient);
//借阅信息
borrowInfo =getBorrowInfo(BorrowInfoHtml);
//借阅历史
borrowHistory = getBorrowHistory(BorrowHistoryHtml);
//借阅信息即为能进行续借操作的页面
renewPage = BorrowInfoHtml;
}
//得到借阅者信息
public String getBorrowerInfo(){
return borrowerInfo;
}
//得到借阅信息
public String getBorrowInfo(){
return borrowInfo;
}
//得到借阅历史
public String getBorrowHistory(){
return borrowHistory;
}
//续借,返回续借结果
public String renew(){
return renewBook(renewPage,httpclient);
}
/**
* 初始化POST表单参数
*
* @param username
* @param password
* @return List<NameValuePair>
* @throws IOException
* @throws ClientProtocolException
*/
private static List<NameValuePair>initialLoginFormData(String username, String password)
throwsClientProtocolException, IOException {
List<NameValuePair> nvps = newArrayList<NameValuePair>();
HashMap<String, String> parmasMap= HtmlUtil.getLoginFormData(LOGIN_URL);
Set<String> keySet =parmasMap.keySet();
for (String temp : keySet) {
if(temp.contains("bor_id")) {
parmasMap.put(temp,username);
} else if(temp.contains("bor_verification")) {
parmasMap.put(temp,password);
}
nvps.add(newBasicNameValuePair(temp, parmasMap.get(temp)));
}
return nvps;
}
/**
* 登陆图书馆主页
*
* @param username
*@param password
* @param post_url
* @return 登陆后的图书馆主页
* @throws IOException
* @throws ClientProtocolException
*/
private static String login(Stringusername,String password,CloseableHttpClient httpclient) {
Stringhtml = "";
try{
List<NameValuePair>nvps = initialLoginFormData(username, password);
HttpPost post = newHttpPost(POST_URL);
post.setEntity(newUrlEncodedFormEntity(nvps));
HttpResponse response = httpclient.execute(post);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
html =EntityUtils.toString(entity,HtmlUtil.getCharset(EntityUtils.toString(entity)));
String mainUrl = HOST +HtmlUtil.getLinks(html).get(0);
html =HttpUtil.getHtml(mainUrl,httpclient);
}catch(ClientProtocolExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
return html;
}
/************************抽取数据**************************/
/**
* 抽取 借阅者信息
* @param BorrowerInfoHtml
* @return BorrowerInfo
*/
private static StringgetBorrowerInfo(String BorrowerInfoHtml){
StringBuffer buffer = newStringBuffer();
try{
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
Document doc =Jsoup.parse(BorrowerInfoHtml);
Elements elements =doc.select("table").select("tr");
Elements col1 =elements.select("td.td4");
Elements col2 =elements.select("td.td1");
for(Element tmp :col1){
key.add(tmp.text());
}
for(Element tmp :col2){
val.add(tmp.text());
}
buffer.append("------------您的个人信息------------\n\n");
for(inti=0;i<key.size();i++){
if(i != 3)
buffer.append(key.get(i)).append(val.get(i)).append("\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (null == buffer) ?null : buffer.substring(0, buffer.lastIndexOf("\n\n"));
}
/**
* 抽取 借阅信息
* @param username
* @param password
* @param login_url
* @return
*/
private static StringgetBorrowInfo(String BorrowInfoHtml){
StringBuffer buffer = newStringBuffer();
try{
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
Document doc =Jsoup.parse(BorrowInfoHtml);
Elements th =doc.select("th.text3");
Elements td =doc.select("td.td1");
for(Element tmp :th){
key.add(tmp.text());
}
for(Element tmp :td){
val.add(tmp.text());
}
buffer.append("------------您的借阅信息------------\n\n");
int itemCounts =key.size();
int groupCounts =val.size()/itemCounts;
for(inti=0;i<groupCounts;i++){
for(intj=0;j<itemCounts;j++){
if(j!=1)
buffer.append(key.get(j)+(j==0?"":":")).append(val.get(i*itemCounts+j)).append("\n\n");
}
buffer.append("-----------------------------------\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (buffer ==null)?null:buffer.substring(0, buffer.lastIndexOf("-----------------------------------\n\n"));
}
/**
* 抽取 借阅历史
* @param username
* @param password
* @param login_url
* @return
*/
private static StringgetBorrowHistory(String BorrowHistoryHtml){
StringBuffer buffer = newStringBuffer();
try{
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
Document doc =Jsoup.parse(BorrowHistoryHtml);
Elements th =doc.select("th.text3");
Elements td =doc.select("td.td1");
for(Element tmp :th){
key.add(tmp.text());
}
for(Element tmp :td){
val.add(tmp.text());
}
buffer.append("------------您的借阅历史------------\n\n");
val.remove(0);
int itemCounts =key.size();
int groupCounts =val.size()/itemCounts;
for(inti=0;i<groupCounts;i++){
for(intj=0;j<itemCounts;j++){
buffer.append(key.get(j)+(j==0?"":":")).append(val.get(i*itemCounts+j)).append("\n\n");
}
buffer.append("-----------------------------------\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (buffer ==null)?null:buffer.substring(0,buffer.lastIndexOf("-----------------------------------\n\n"));
}
/**
* 续借
* @param renewHtml
* @param httpclient
* @return 续借结果
*/
private static String renewBook(StringrenewHtml,CloseableHttpClient httpclient){
StringBuffer buffer = newStringBuffer();
try{
Document doc
1658e
=Jsoup.parse(renewHtml);
String url =doc.getElementsContainingOwnText("全部续借").attr("href").split("\'")[1];
String resultPage =HttpUtil.getHtml(url,httpclient);
doc =Jsoup.parse(resultPage);
Elements th =doc.select("tr.tr1").select("th");
Elements td =doc.select("td.td1");
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
for(Element tmp :th){
key.add(tmp.text());
}
for(Element tmp :td){
val.add(tmp.text());
}
buffer.append("------------您的续借结果------------\n\n");
int itemCounts =key.size();
int groupCounts =val.size()/itemCounts;
for(inti=0;i<groupCounts;i++){
for(intj=0;j<itemCounts;j++){
buffer.append(key.get(j)+":").append(val.get(i*itemCounts+j)).append("\n\n");
}
buffer.append("-----------------------------------\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (buffer ==null)?null:buffer.substring(0,buffer.lastIndexOf("-----------------------------------\n\n"));
}
//测试
public static void main(String[] args)throws Exception {
String username = "*************";
String password = "******";
DigitalLibraryServicemyDigitalLibraryService = new DigitalLibraryService(username,password);
//System.out.println("---------------------------------个人信息----------------------------------");
System.out.println(myDigitalLibraryService.getBorrowerInfo());
//System.out.println("---------------------------------借阅信息----------------------------------");
System.out.println(myDigitalLibraryService.getBorrowInfo());
//System.out.println("---------------------------------借阅历史----------------------------------");
System.out.println(myDigitalLibraryService.getBorrowHistory());
//System.out.println("---------------------------------续借结果----------------------------------");
System.out.println(myDigitalLibraryService.renew());
}
}
测试效果如下:
由于截图显示不全,我把结果贴在下面:
------------您的个人信息------------
姓??名:?莫帮杰
读者证号:?2013301200227
有效期:?20170630
当前借阅数:1
预约请求数:0
现金记录:0.00
------------您的借阅信息------------
No.1
著者:郭珍
题名:JSP程序设计教程
出版年:2012
应还日期:20150311
应还时间:22:00
罚款:
分馆:总馆图书借阅区A2-A5
索书号:TP393.092/G591c2
单册描述:
SFX:
------------您的借阅历史------------
No.1
著者:郭珍
题名:JSP程序设计教程
年:2012
应还日期:20141229
应还时间:22:00
归还日期:20141221
归还时间:19:11
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.2
著者:IBMG国际商业管理集团
题名:榜样的力量 :连锁零售企业背后的故事 :the stories of the retailers
年:2012
应还日期:20141210
应还时间:22:00
归还日期:20141124
归还时间:19:10
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.3
著者:周星潼
题名:芝麻开门 :成就阿里巴巴网络帝国的13个管理法则
年:2012
应还日期:20141210
应还时间:22:00
归还日期:20141124
归还时间:19:10
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.4
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20141202
应还时间:22:00
归还日期:20141102
归还时间:10:42
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.5
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20141202
应还时间:22:00
归还日期:20141124
归还时间:19:10
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.6
著者:威尔弗雷德
题名:PHP专业项目实例开发
年:2003
应还日期:20141115
应还时间:22:00
归还日期:20141108
归还时间:09:51
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.7
著者:潘凯华
题名:PHP求职宝典
年:2012
应还日期:20141114
应还时间:22:00
归还日期:20141108
归还时间:09:51
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.8
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20141020
应还时间:22:00
归还日期:20141012
归还时间:15:14
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.9
著者:叶青
题名:网页开发手记 :HTML+CSS+JavaScript实战详解
年:2011
应还日期:20141020
应还时间:22:00
归还日期:20141012
归还时间:15:15
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.10
著者:叶青
题名:网页开发手记 :HTML+CSS+JavaScript实战详解
年:2011
应还日期:20140929
应还时间:22:00
归还日期:20140920
归还时间:18:31
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.11
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20140929
应还时间:22:00
归还日期:20140920
归还时间:18:31
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.12
著者:刘剑
题名:51单片机开发与应用基础教程 :C语言版
年:2012
应还日期:20140911
应还时间:22:00
归还日期:20140701
归还时间:14:16
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.13
著者:熊斌
题名:Android多媒体开发技术实战详解
年:2012
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:00
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.14
著者:贝内特
题名:Objective-C初学者指南
年:2012
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:01
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.15
著者:张海霞
题名:奇思妙想的物联网 :2012年中国大学生物联网创新创业大赛获奖作品集锦
年:2013
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:01
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.16
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:02
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.17
著者:叶青
题名:网页开发手记 :HTML+CSS+JavaScript实战详解
年:2011
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:02
罚款:
分馆:总馆图书借阅区A2-A5
------------您的续借结果------------
序号:1
描述:JSP程序设计教程
单册状态:已借出
应还日期:20150311
应还时间:22:00
分馆:总馆图书借阅区A2-A5
条码:101101630602
单册描述:
未能续借的原因:不能再续借 (还书日期没改变)。
我把代码中的HttpUtil.java也贴出来:
packagecom.wdyx.weixin.service;
importjava.io.BufferedReader;
importjava.io.IOException;
importjava.io.InputStream;
importjava.io.InputStreamReader;
importjava.net.HttpURLConnection;
importjava.net.URL;
import org.apache.http.HttpEntity;
importorg.apache.http.HttpResponse;
importorg.apache.http.client.ClientProtocolException;
importorg.apache.http.client.methods.HttpGet;
importorg.apache.http.entity.BufferedHttpEntity;
importorg.apache.http.impl.client.CloseableHttpClient;
importorg.apache.http.impl.client.HttpClients;
importorg.apache.http.util.EntityUtils;
/**
* HTTP小工具
* @author 帮杰
*
*/
public classHttpUtil {
/**
* 判断是否有重定向
* @param response
* @return boolean
*/
public static boolean isRedirected(HttpResponseresponse){
int code =response.getStatusLine().getStatusCode();
return(code==301||code==302)?true:false;
}
/**
* 得到重定向Location
* @param response
* @return String
*/
public static StringgetLocation(HttpResponse response){
returnresponse.getHeaders("Location")[0].toString();
}
/**
* 处理重定向
* @param response
* @param httpclient
* @return HttpResponse
*/
public static HttpResponsehandleRedirect(HttpResponse response,CloseableHttpClient httpclient){
if(isRedirected(response)){
try {
Stringlocation = getLocation(response);
HttpGethttpget = new HttpGet(location);
response =httpclient.execute(httpget);
} catch(ClientProtocolException e) {
// TODOAuto-generated catch block
e.printStackTrace();
} catch (IOExceptione) {
// TODOAuto-generated catch block
e.printStackTrace();
}
}
return response;
}
/**
* 新请求一个页面
*
* @param url
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml(Stringurl) throws ClientProtocolException, IOException {
String html = "";
HttpGet httpget = newHttpGet(url);
HttpResponse response =HttpClients.createDefault().execute(httpget);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 新请求一个页面,自动处理重定向
*
* @param url
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml3xx(Stringurl) throws ClientProtocolException, IOException {
String html = "";
HttpGet httpget = newHttpGet(url);
CloseableHttpClienthttpclient = HttpClients.createDefault();
HttpResponse response =handleRedirect(httpclient.execute(httpget),httpclient);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 指定client请求页面
*
* @param url
* @param httpclient
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml(Stringurl,CloseableHttpClient httpclient) throws ClientProtocolException, IOException{
String html = "";
HttpGet httpget = newHttpGet(url);
HttpResponse response =httpclient.execute(httpget);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 指定client请求页面,自动处理重定向
*
* @param url
* @param httpclient
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml3xx(Stringurl,CloseableHttpClient httpclient) throws ClientProtocolException, IOException{
Stringhtml = "";
HttpGet httpget = newHttpGet(url);
HttpResponse response =handleRedirect(httpclient.execute(httpget),httpclient);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 另一种发起http get请求获取网页源代码 的方法
* 有时候会快一些
*
* @param requestUrl
* @return String
*/
public static String httpRequest(StringrequestUrl,String charset) {
StringBuffer buffer = null;
try {
// 建立连接
URL url = new URL(requestUrl);
HttpURLConnection httpUrlConn =(HttpURLConnection) url.openConnection();
httpUrlConn.setDoInput(true);
httpUrlConn.setRequestMethod("GET");
// 获取输入流
InputStream inputStream =httpUrlConn.getInputStream();
InputStreamReader inputStreamReader= new InputStreamReader(inputStream, charset);
BufferedReader bufferedReader = newBufferedReader(inputStreamReader);
// 读取返回结果
buffer = new StringBuffer();
String str = null;
while ((str = bufferedReader.readLine())!= null) {
buffer.append(str);
}
// 释放资源
bufferedReader.close();
inputStreamReader.close();
inputStream.close();
httpUrlConn.disconnect();
} catch (Exception e) {
e.printStackTrace();
}
return buffer.toString();
}
}
还有HtmlUtil.java
packagecom.wdyx.weixin.service;
importjava.io.IOException;
importjava.net.MalformedURLException;
importjava.net.URL;
importjava.util.ArrayList;
importjava.util.HashMap;
importjava.util.List;
importorg.apache.http.client.ClientProtocolException;
importorg.jsoup.Jsoup;
importorg.jsoup.nodes.Document;
importorg.jsoup.nodes.Element;
importorg.jsoup.select.Elements;
/**
* HTML小工具
* @author 帮杰
*
*/
public classHtmlUtil {
private static final String GBK ="GBK";
private static final String GB_2312 ="GB2312";
private static final String UTF_8 ="UTF-8";
private static final String UTF_16 ="UTF-16";
private static final String ISO_8859_1= "ISO-8859-1";
private static final String ISO_8859_2= "ISO-8859-2";
private static final String ISO_8859_3= "ISO-8859-3";
private static final String ISO_8859_4= "ISO-8859-4";
private static final String ISO_8859_5= "ISO-8859-5";
private static final String ISO_8859_6= "ISO-8859-6";
private static final String ISO_8859_7= "ISO-8859-7";
private static final String ISO_8859_8= "ISO-8859-8";
private static final String ISO_8859_9= "ISO-8859-9";
private static final String ISO_8859_10= "ISO-8859-10";
private static final String ISO_8859_15= "ISO-8859-15";
private static final String ISO_2022_JP= "ISO-2022-JP";
private static final StringISO_2022_JP_2 = "ISO-2022-JP-2";
private static final String ISO_2022_KR= "ISO-2022-KR";
/**
* 列举所有字符编码
* @return List<String>
*/
private static List<String>getCharsetList(){
List<String>charsetList = new ArrayList<String>();
charsetList.add(GBK);
charsetList.add(GB_2312);
charsetList.add(UTF_8);
charsetList.add(UTF_16);
charsetList.add(ISO_8859_1);
charsetList.add(ISO_8859_2);
charsetList.add(ISO_8859_3);
charsetList.add(ISO_8859_4);
charsetList.add(ISO_8859_5);
charsetList.add(ISO_8859_6);
charsetList.add(ISO_8859_7);
charsetList.add(ISO_8859_8);
charsetList.add(ISO_8859_9);
charsetList.add(ISO_8859_10);
charsetList.add(ISO_8859_15);
charsetList.add(ISO_2022_JP);
charsetList.add(ISO_2022_JP_2);
charsetList.add(ISO_2022_KR);
return charsetList;
}
/**
* 获取HTML页面的字符集
* @param html
* @return
*/
public static String getCharset(Stringhtml){
String charSet ="utf-8";
try{
Document doc =Jsoup.parse(html);
String content =doc.getElementsByTag("meta").select("[http-equiv=Content-Type]").attr("content");
List<String>charSetList = getCharsetList();
if(content != null){
for(Stringcharset : charSetList){
if(content.contains(charset)||content.contains(charset.toLowerCase())){
charSet= charset;
break;
}
}
}
}finally{
}
return charSet;
}
/**
* 获取HTML页面的所有链接
* @param html
* @return HTML页面的所有链接
*/
public static List<String>getLinks(String html){
List<String> links =new ArrayList<String>();
try{
Document doc =Jsoup.parse(html);
Elements elements =doc.select("a[href]");
for(Element element: elements){
links.add(element.attr("href"));
}
}catch(Exception e){
e.printStackTrace();
}
return links;
}
/**
* 获取HTML页面的所有资源地址
* @param html
* @return HTML页面的所有资源地址
*/
public static List<String>getSrc(String html){
List<String> src = newArrayList<String>();
try{
Document doc =Jsoup.parse(html);
Elements elements =doc.select("[src]");
for(Element element: elements){
src.add(element.attr("src"));
}
}catch(Exception e){
e.printStackTrace();
}
return src;
}
/**
* 获取HTML登录页面的action地址
* @param login_url
* @return String
*/
public static String getAction(Stringlogin_url){
String action = "";
try {
Document doc =Jsoup.parse(new URL(login_url), 5000) ;
Elements elements =doc.getElementsByTag("form");
Element element =elements.select("[method=post]").first();
action =element.attr("action");
} catch (MalformedURLExceptione) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return action;
}
/**
* 获取登录表单input内容
*
* @param url
* @return HashMap<String, String>
* @throws IOException
* @throws ClientProtocolException
*/
public static HashMap<String, String>getLoginFormData(String login_url) throws ClientProtocolException, IOException{
Document document = Jsoup.parse(newURL(login_url), 5000) ;
Elements elements =document.getElementsByTag("form").select("[method=post]").first().select("input[name]");
HashMap<String, String> parmas =new HashMap<String, String>();
for (Element temp : elements) {
parmas.put(temp.attr("name"),temp.attr("value"));
}
return parmas;
}
}
有了获得相关信息的方法后,再和微信的消息机制整合,就可以实现在微信平台上的数字图书馆服务,具体整合方式此处不再赘述。
此处虽以以武大图书馆为例,但其方法是通用的。其他学校的朋友如果想做这项服务的话只需修改少量代码即可复用。
好了,今天就到此为止了,如果有盆友有更好的思路或改进方法,请联系我,我们共同学习和探讨。我的邮箱是:mobangjack@foxmail.com
最近在做微信的后台开发,看到有些高校的微信公众平台上有查询借阅和续借的功能,自己闲来无事也瞎鼓捣了一个,现在把成果贴出来和大家一起分享。
由于本人在武汉大学,故以武汉大学图书馆为例。
思路:
1. 模拟登陆
2. 抓取数据
语言:java
代码:DigitalLibraryService.java
packagecom.wdyx.weixin.service;
importjava.io.IOException;
importjava.util.ArrayList;
importjava.util.HashMap;
importjava.util.List;
importjava.util.Set;
importorg.apache.http.HttpEntity;
importorg.apache.http.HttpResponse;
importorg.apache.http.NameValuePair;
importorg.apache.http.client.ClientProtocolException;
import org.apache.http.client.entity.UrlEncodedFormEntity;
importorg.apache.http.client.methods.HttpPost;
importorg.apache.http.entity.BufferedHttpEntity;
importorg.apache.http.impl.client.CloseableHttpClient;
importorg.apache.http.impl.client.HttpClients;
importorg.apache.http.message.BasicNameValuePair;
importorg.apache.http.util.EntityUtils;
importorg.jsoup.Jsoup;
importorg.jsoup.nodes.Document;
importorg.jsoup.nodes.Element;
importorg.jsoup.select.Elements;
/**
* 数字图书馆服务
* 功能:查询个人信息、借阅信息、借阅历史,以及进行续借操作
* @author帮杰
*/
public classDigitalLibraryService {
//主机地址
public static final String HOST ="http://metalib.lib.whu.edu.cn";
//登陆页面地址
public static final String LOGIN_URL ="http://apps.lib.whu.edu.cn/web/login.asp";
//表单post地址
public static final String POST_URL ="http://metalib.lib.whu.edu.cn:80/pds";
//借阅者信息
private String borrowerInfo = null;
//借阅信息
private String borrowInfo = null;
//借阅历史
private String borrowHistory = null;
//储
4000
存登录页,以备续借之需
private String renewPage = null;
//每一个客户即为一个HttpClient对象
private CloseableHttpClient httpclient= null;
//构造函数,以用户名和密码作为参数
public DigitalLibraryService(Stringusername,String password) throws ClientProtocolException, IOException{
//初始化HttpClient对象
httpclient =HttpClients.createDefault();
//解析登录页
Document doc =Jsoup.parse(login(username, password, httpclient));
//“我的借阅信息”所对应的url
String BorrowerInfoUrl =doc.getElementsContainingOwnText("我的借阅信息").first().attr("href");
//据观察有重定向;获得重定向地址
StringRelocatedBorrowerInfoUrl =HttpUtil.getHtml(BorrowerInfoUrl,httpclient).split("\'")[1];
//重定向页
StringRelocatedBorrowerInfoHtml =HttpUtil.getHtml(RelocatedBorrowerInfoUrl,httpclient);
//借阅者信息页
String BorrowerInfoHtml =HttpUtil.getHtml(HOST + HtmlUtil.getLinks(RelocatedBorrowerInfoHtml).get(0),httpclient);
//通过抓取获得借阅者信息
borrowerInfo =getBorrowerInfo(BorrowerInfoHtml);
doc =Jsoup.parse(BorrowerInfoHtml);
//"当前借阅数:"后面的数字链接指向借阅信息页,故此处取得该链接
String BorrowInfoUrl =doc.getElementsContainingOwnText("当前借阅数:").parents().select("a").attr("href").split("\'")[1];
//借阅历史页面链接
String BorrowHistoryUrl =doc.getElementsContainingOwnText("借阅历史").attr("href");
//借阅信息页
String BorrowInfoHtml =HttpUtil.getHtml(BorrowInfoUrl, httpclient);
//借阅历史页
String BorrowHistoryHtml =HttpUtil.getHtml(BorrowHistoryUrl, httpclient);
//借阅信息
borrowInfo =getBorrowInfo(BorrowInfoHtml);
//借阅历史
borrowHistory = getBorrowHistory(BorrowHistoryHtml);
//借阅信息即为能进行续借操作的页面
renewPage = BorrowInfoHtml;
}
//得到借阅者信息
public String getBorrowerInfo(){
return borrowerInfo;
}
//得到借阅信息
public String getBorrowInfo(){
return borrowInfo;
}
//得到借阅历史
public String getBorrowHistory(){
return borrowHistory;
}
//续借,返回续借结果
public String renew(){
return renewBook(renewPage,httpclient);
}
/**
* 初始化POST表单参数
*
* @param username
* @param password
* @return List<NameValuePair>
* @throws IOException
* @throws ClientProtocolException
*/
private static List<NameValuePair>initialLoginFormData(String username, String password)
throwsClientProtocolException, IOException {
List<NameValuePair> nvps = newArrayList<NameValuePair>();
HashMap<String, String> parmasMap= HtmlUtil.getLoginFormData(LOGIN_URL);
Set<String> keySet =parmasMap.keySet();
for (String temp : keySet) {
if(temp.contains("bor_id")) {
parmasMap.put(temp,username);
} else if(temp.contains("bor_verification")) {
parmasMap.put(temp,password);
}
nvps.add(newBasicNameValuePair(temp, parmasMap.get(temp)));
}
return nvps;
}
/**
* 登陆图书馆主页
*
* @param username
*@param password
* @param post_url
* @return 登陆后的图书馆主页
* @throws IOException
* @throws ClientProtocolException
*/
private static String login(Stringusername,String password,CloseableHttpClient httpclient) {
Stringhtml = "";
try{
List<NameValuePair>nvps = initialLoginFormData(username, password);
HttpPost post = newHttpPost(POST_URL);
post.setEntity(newUrlEncodedFormEntity(nvps));
HttpResponse response = httpclient.execute(post);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
html =EntityUtils.toString(entity,HtmlUtil.getCharset(EntityUtils.toString(entity)));
String mainUrl = HOST +HtmlUtil.getLinks(html).get(0);
html =HttpUtil.getHtml(mainUrl,httpclient);
}catch(ClientProtocolExceptione){
e.printStackTrace();
}catch(IOExceptione){
e.printStackTrace();
}
return html;
}
/************************抽取数据**************************/
/**
* 抽取 借阅者信息
* @param BorrowerInfoHtml
* @return BorrowerInfo
*/
private static StringgetBorrowerInfo(String BorrowerInfoHtml){
StringBuffer buffer = newStringBuffer();
try{
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
Document doc =Jsoup.parse(BorrowerInfoHtml);
Elements elements =doc.select("table").select("tr");
Elements col1 =elements.select("td.td4");
Elements col2 =elements.select("td.td1");
for(Element tmp :col1){
key.add(tmp.text());
}
for(Element tmp :col2){
val.add(tmp.text());
}
buffer.append("------------您的个人信息------------\n\n");
for(inti=0;i<key.size();i++){
if(i != 3)
buffer.append(key.get(i)).append(val.get(i)).append("\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (null == buffer) ?null : buffer.substring(0, buffer.lastIndexOf("\n\n"));
}
/**
* 抽取 借阅信息
* @param username
* @param password
* @param login_url
* @return
*/
private static StringgetBorrowInfo(String BorrowInfoHtml){
StringBuffer buffer = newStringBuffer();
try{
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
Document doc =Jsoup.parse(BorrowInfoHtml);
Elements th =doc.select("th.text3");
Elements td =doc.select("td.td1");
for(Element tmp :th){
key.add(tmp.text());
}
for(Element tmp :td){
val.add(tmp.text());
}
buffer.append("------------您的借阅信息------------\n\n");
int itemCounts =key.size();
int groupCounts =val.size()/itemCounts;
for(inti=0;i<groupCounts;i++){
for(intj=0;j<itemCounts;j++){
if(j!=1)
buffer.append(key.get(j)+(j==0?"":":")).append(val.get(i*itemCounts+j)).append("\n\n");
}
buffer.append("-----------------------------------\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (buffer ==null)?null:buffer.substring(0, buffer.lastIndexOf("-----------------------------------\n\n"));
}
/**
* 抽取 借阅历史
* @param username
* @param password
* @param login_url
* @return
*/
private static StringgetBorrowHistory(String BorrowHistoryHtml){
StringBuffer buffer = newStringBuffer();
try{
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
Document doc =Jsoup.parse(BorrowHistoryHtml);
Elements th =doc.select("th.text3");
Elements td =doc.select("td.td1");
for(Element tmp :th){
key.add(tmp.text());
}
for(Element tmp :td){
val.add(tmp.text());
}
buffer.append("------------您的借阅历史------------\n\n");
val.remove(0);
int itemCounts =key.size();
int groupCounts =val.size()/itemCounts;
for(inti=0;i<groupCounts;i++){
for(intj=0;j<itemCounts;j++){
buffer.append(key.get(j)+(j==0?"":":")).append(val.get(i*itemCounts+j)).append("\n\n");
}
buffer.append("-----------------------------------\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (buffer ==null)?null:buffer.substring(0,buffer.lastIndexOf("-----------------------------------\n\n"));
}
/**
* 续借
* @param renewHtml
* @param httpclient
* @return 续借结果
*/
private static String renewBook(StringrenewHtml,CloseableHttpClient httpclient){
StringBuffer buffer = newStringBuffer();
try{
Document doc
1658e
=Jsoup.parse(renewHtml);
String url =doc.getElementsContainingOwnText("全部续借").attr("href").split("\'")[1];
String resultPage =HttpUtil.getHtml(url,httpclient);
doc =Jsoup.parse(resultPage);
Elements th =doc.select("tr.tr1").select("th");
Elements td =doc.select("td.td1");
List<String>key = new ArrayList<String>();
List<String>val = new ArrayList<String>();
for(Element tmp :th){
key.add(tmp.text());
}
for(Element tmp :td){
val.add(tmp.text());
}
buffer.append("------------您的续借结果------------\n\n");
int itemCounts =key.size();
int groupCounts =val.size()/itemCounts;
for(inti=0;i<groupCounts;i++){
for(intj=0;j<itemCounts;j++){
buffer.append(key.get(j)+":").append(val.get(i*itemCounts+j)).append("\n\n");
}
buffer.append("-----------------------------------\n\n");
}
}catch(Exception e){
e.printStackTrace();
}
return (buffer ==null)?null:buffer.substring(0,buffer.lastIndexOf("-----------------------------------\n\n"));
}
//测试
public static void main(String[] args)throws Exception {
String username = "*************";
String password = "******";
DigitalLibraryServicemyDigitalLibraryService = new DigitalLibraryService(username,password);
//System.out.println("---------------------------------个人信息----------------------------------");
System.out.println(myDigitalLibraryService.getBorrowerInfo());
//System.out.println("---------------------------------借阅信息----------------------------------");
System.out.println(myDigitalLibraryService.getBorrowInfo());
//System.out.println("---------------------------------借阅历史----------------------------------");
System.out.println(myDigitalLibraryService.getBorrowHistory());
//System.out.println("---------------------------------续借结果----------------------------------");
System.out.println(myDigitalLibraryService.renew());
}
}
测试效果如下:
由于截图显示不全,我把结果贴在下面:
------------您的个人信息------------
姓??名:?莫帮杰
读者证号:?2013301200227
有效期:?20170630
当前借阅数:1
预约请求数:0
现金记录:0.00
------------您的借阅信息------------
No.1
著者:郭珍
题名:JSP程序设计教程
出版年:2012
应还日期:20150311
应还时间:22:00
罚款:
分馆:总馆图书借阅区A2-A5
索书号:TP393.092/G591c2
单册描述:
SFX:
------------您的借阅历史------------
No.1
著者:郭珍
题名:JSP程序设计教程
年:2012
应还日期:20141229
应还时间:22:00
归还日期:20141221
归还时间:19:11
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.2
著者:IBMG国际商业管理集团
题名:榜样的力量 :连锁零售企业背后的故事 :the stories of the retailers
年:2012
应还日期:20141210
应还时间:22:00
归还日期:20141124
归还时间:19:10
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.3
著者:周星潼
题名:芝麻开门 :成就阿里巴巴网络帝国的13个管理法则
年:2012
应还日期:20141210
应还时间:22:00
归还日期:20141124
归还时间:19:10
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.4
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20141202
应还时间:22:00
归还日期:20141102
归还时间:10:42
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.5
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20141202
应还时间:22:00
归还日期:20141124
归还时间:19:10
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.6
著者:威尔弗雷德
题名:PHP专业项目实例开发
年:2003
应还日期:20141115
应还时间:22:00
归还日期:20141108
归还时间:09:51
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.7
著者:潘凯华
题名:PHP求职宝典
年:2012
应还日期:20141114
应还时间:22:00
归还日期:20141108
归还时间:09:51
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.8
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20141020
应还时间:22:00
归还日期:20141012
归还时间:15:14
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.9
著者:叶青
题名:网页开发手记 :HTML+CSS+JavaScript实战详解
年:2011
应还日期:20141020
应还时间:22:00
归还日期:20141012
归还时间:15:15
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.10
著者:叶青
题名:网页开发手记 :HTML+CSS+JavaScript实战详解
年:2011
应还日期:20140929
应还时间:22:00
归还日期:20140920
归还时间:18:31
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.11
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20140929
应还时间:22:00
归还日期:20140920
归还时间:18:31
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.12
著者:刘剑
题名:51单片机开发与应用基础教程 :C语言版
年:2012
应还日期:20140911
应还时间:22:00
归还日期:20140701
归还时间:14:16
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.13
著者:熊斌
题名:Android多媒体开发技术实战详解
年:2012
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:00
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.14
著者:贝内特
题名:Objective-C初学者指南
年:2012
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:01
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.15
著者:张海霞
题名:奇思妙想的物联网 :2012年中国大学生物联网创新创业大赛获奖作品集锦
年:2013
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:01
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.16
著者:赖利
题名:乐享Arduino+Android+PC创意制作
年:2014
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:02
罚款:
分馆:总馆图书借阅区A2-A5
-----------------------------------
No.17
著者:叶青
题名:网页开发手记 :HTML+CSS+JavaScript实战详解
年:2011
应还日期:20140911
应还时间:22:00
归还日期:20140830
归还时间:19:02
罚款:
分馆:总馆图书借阅区A2-A5
------------您的续借结果------------
序号:1
描述:JSP程序设计教程
单册状态:已借出
应还日期:20150311
应还时间:22:00
分馆:总馆图书借阅区A2-A5
条码:101101630602
单册描述:
未能续借的原因:不能再续借 (还书日期没改变)。
我把代码中的HttpUtil.java也贴出来:
packagecom.wdyx.weixin.service;
importjava.io.BufferedReader;
importjava.io.IOException;
importjava.io.InputStream;
importjava.io.InputStreamReader;
importjava.net.HttpURLConnection;
importjava.net.URL;
import org.apache.http.HttpEntity;
importorg.apache.http.HttpResponse;
importorg.apache.http.client.ClientProtocolException;
importorg.apache.http.client.methods.HttpGet;
importorg.apache.http.entity.BufferedHttpEntity;
importorg.apache.http.impl.client.CloseableHttpClient;
importorg.apache.http.impl.client.HttpClients;
importorg.apache.http.util.EntityUtils;
/**
* HTTP小工具
* @author 帮杰
*
*/
public classHttpUtil {
/**
* 判断是否有重定向
* @param response
* @return boolean
*/
public static boolean isRedirected(HttpResponseresponse){
int code =response.getStatusLine().getStatusCode();
return(code==301||code==302)?true:false;
}
/**
* 得到重定向Location
* @param response
* @return String
*/
public static StringgetLocation(HttpResponse response){
returnresponse.getHeaders("Location")[0].toString();
}
/**
* 处理重定向
* @param response
* @param httpclient
* @return HttpResponse
*/
public static HttpResponsehandleRedirect(HttpResponse response,CloseableHttpClient httpclient){
if(isRedirected(response)){
try {
Stringlocation = getLocation(response);
HttpGethttpget = new HttpGet(location);
response =httpclient.execute(httpget);
} catch(ClientProtocolException e) {
// TODOAuto-generated catch block
e.printStackTrace();
} catch (IOExceptione) {
// TODOAuto-generated catch block
e.printStackTrace();
}
}
return response;
}
/**
* 新请求一个页面
*
* @param url
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml(Stringurl) throws ClientProtocolException, IOException {
String html = "";
HttpGet httpget = newHttpGet(url);
HttpResponse response =HttpClients.createDefault().execute(httpget);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 新请求一个页面,自动处理重定向
*
* @param url
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml3xx(Stringurl) throws ClientProtocolException, IOException {
String html = "";
HttpGet httpget = newHttpGet(url);
CloseableHttpClienthttpclient = HttpClients.createDefault();
HttpResponse response =handleRedirect(httpclient.execute(httpget),httpclient);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 指定client请求页面
*
* @param url
* @param httpclient
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml(Stringurl,CloseableHttpClient httpclient) throws ClientProtocolException, IOException{
String html = "";
HttpGet httpget = newHttpGet(url);
HttpResponse response =httpclient.execute(httpget);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 指定client请求页面,自动处理重定向
*
* @param url
* @param httpclient
* @return HTML
* @throws IOException
* @throws ClientProtocolException
* @return String
*/
public static String getHtml3xx(Stringurl,CloseableHttpClient httpclient) throws ClientProtocolException, IOException{
Stringhtml = "";
HttpGet httpget = newHttpGet(url);
HttpResponse response =handleRedirect(httpclient.execute(httpget),httpclient);
HttpEntity entity = newBufferedHttpEntity(response.getEntity());
String charset =HtmlUtil.getCharset(EntityUtils.toString(entity));
html =EntityUtils.toString(entity,charset);
return html;
}
/**
* 另一种发起http get请求获取网页源代码 的方法
* 有时候会快一些
*
* @param requestUrl
* @return String
*/
public static String httpRequest(StringrequestUrl,String charset) {
StringBuffer buffer = null;
try {
// 建立连接
URL url = new URL(requestUrl);
HttpURLConnection httpUrlConn =(HttpURLConnection) url.openConnection();
httpUrlConn.setDoInput(true);
httpUrlConn.setRequestMethod("GET");
// 获取输入流
InputStream inputStream =httpUrlConn.getInputStream();
InputStreamReader inputStreamReader= new InputStreamReader(inputStream, charset);
BufferedReader bufferedReader = newBufferedReader(inputStreamReader);
// 读取返回结果
buffer = new StringBuffer();
String str = null;
while ((str = bufferedReader.readLine())!= null) {
buffer.append(str);
}
// 释放资源
bufferedReader.close();
inputStreamReader.close();
inputStream.close();
httpUrlConn.disconnect();
} catch (Exception e) {
e.printStackTrace();
}
return buffer.toString();
}
}
还有HtmlUtil.java
packagecom.wdyx.weixin.service;
importjava.io.IOException;
importjava.net.MalformedURLException;
importjava.net.URL;
importjava.util.ArrayList;
importjava.util.HashMap;
importjava.util.List;
importorg.apache.http.client.ClientProtocolException;
importorg.jsoup.Jsoup;
importorg.jsoup.nodes.Document;
importorg.jsoup.nodes.Element;
importorg.jsoup.select.Elements;
/**
* HTML小工具
* @author 帮杰
*
*/
public classHtmlUtil {
private static final String GBK ="GBK";
private static final String GB_2312 ="GB2312";
private static final String UTF_8 ="UTF-8";
private static final String UTF_16 ="UTF-16";
private static final String ISO_8859_1= "ISO-8859-1";
private static final String ISO_8859_2= "ISO-8859-2";
private static final String ISO_8859_3= "ISO-8859-3";
private static final String ISO_8859_4= "ISO-8859-4";
private static final String ISO_8859_5= "ISO-8859-5";
private static final String ISO_8859_6= "ISO-8859-6";
private static final String ISO_8859_7= "ISO-8859-7";
private static final String ISO_8859_8= "ISO-8859-8";
private static final String ISO_8859_9= "ISO-8859-9";
private static final String ISO_8859_10= "ISO-8859-10";
private static final String ISO_8859_15= "ISO-8859-15";
private static final String ISO_2022_JP= "ISO-2022-JP";
private static final StringISO_2022_JP_2 = "ISO-2022-JP-2";
private static final String ISO_2022_KR= "ISO-2022-KR";
/**
* 列举所有字符编码
* @return List<String>
*/
private static List<String>getCharsetList(){
List<String>charsetList = new ArrayList<String>();
charsetList.add(GBK);
charsetList.add(GB_2312);
charsetList.add(UTF_8);
charsetList.add(UTF_16);
charsetList.add(ISO_8859_1);
charsetList.add(ISO_8859_2);
charsetList.add(ISO_8859_3);
charsetList.add(ISO_8859_4);
charsetList.add(ISO_8859_5);
charsetList.add(ISO_8859_6);
charsetList.add(ISO_8859_7);
charsetList.add(ISO_8859_8);
charsetList.add(ISO_8859_9);
charsetList.add(ISO_8859_10);
charsetList.add(ISO_8859_15);
charsetList.add(ISO_2022_JP);
charsetList.add(ISO_2022_JP_2);
charsetList.add(ISO_2022_KR);
return charsetList;
}
/**
* 获取HTML页面的字符集
* @param html
* @return
*/
public static String getCharset(Stringhtml){
String charSet ="utf-8";
try{
Document doc =Jsoup.parse(html);
String content =doc.getElementsByTag("meta").select("[http-equiv=Content-Type]").attr("content");
List<String>charSetList = getCharsetList();
if(content != null){
for(Stringcharset : charSetList){
if(content.contains(charset)||content.contains(charset.toLowerCase())){
charSet= charset;
break;
}
}
}
}finally{
}
return charSet;
}
/**
* 获取HTML页面的所有链接
* @param html
* @return HTML页面的所有链接
*/
public static List<String>getLinks(String html){
List<String> links =new ArrayList<String>();
try{
Document doc =Jsoup.parse(html);
Elements elements =doc.select("a[href]");
for(Element element: elements){
links.add(element.attr("href"));
}
}catch(Exception e){
e.printStackTrace();
}
return links;
}
/**
* 获取HTML页面的所有资源地址
* @param html
* @return HTML页面的所有资源地址
*/
public static List<String>getSrc(String html){
List<String> src = newArrayList<String>();
try{
Document doc =Jsoup.parse(html);
Elements elements =doc.select("[src]");
for(Element element: elements){
src.add(element.attr("src"));
}
}catch(Exception e){
e.printStackTrace();
}
return src;
}
/**
* 获取HTML登录页面的action地址
* @param login_url
* @return String
*/
public static String getAction(Stringlogin_url){
String action = "";
try {
Document doc =Jsoup.parse(new URL(login_url), 5000) ;
Elements elements =doc.getElementsByTag("form");
Element element =elements.select("[method=post]").first();
action =element.attr("action");
} catch (MalformedURLExceptione) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return action;
}
/**
* 获取登录表单input内容
*
* @param url
* @return HashMap<String, String>
* @throws IOException
* @throws ClientProtocolException
*/
public static HashMap<String, String>getLoginFormData(String login_url) throws ClientProtocolException, IOException{
Document document = Jsoup.parse(newURL(login_url), 5000) ;
Elements elements =document.getElementsByTag("form").select("[method=post]").first().select("input[name]");
HashMap<String, String> parmas =new HashMap<String, String>();
for (Element temp : elements) {
parmas.put(temp.attr("name"),temp.attr("value"));
}
return parmas;
}
}
有了获得相关信息的方法后,再和微信的消息机制整合,就可以实现在微信平台上的数字图书馆服务,具体整合方式此处不再赘述。
此处虽以以武大图书馆为例,但其方法是通用的。其他学校的朋友如果想做这项服务的话只需修改少量代码即可复用。
好了,今天就到此为止了,如果有盆友有更好的思路或改进方法,请联系我,我们共同学习和探讨。我的邮箱是:mobangjack@foxmail.com
相关文章推荐
- 微信公共服务平台开发(.Net 的实现)2-------获得ACCESSTOKEN
- 微信公共服务平台开发(.Net 的实现)4-------语音识别
- 微信公共服务平台开发(.Net 的实现)3-------发送文本消息
- 微信公共服务平台开发(.Net 的实现)8-------处理图片(上传下载发送)
- 微信公共服务平台开发(.Net 的实现)7-------发送图文消息
- 微信客户服务最难的不是功能设计和开发,而是对微信客户服务运营的理解
- 微信公共服务平台开发(.Net 的实现)12-------网页授权(上 :更加深入理解OAuth2.0 )
- 微信公共服务平台开发(.Net 的实现)1-------认证“成为开发者”
- 微信公共账号开发(未认证服务号)
- 微信公共服务平台开发(.Net 的实现)10-------地理位置
- 微信公共服务平台开发(.Net 的实现)5-------解决access_token过期的问题
- 微信公共服务平台开发(.Net 的实现)10-------地理位置
- 微信公共服务平台开发(.Net 的实现)9-------处理二维码
- 微信公共服务平台开发(.Net 的实现)2-------获得ACCESSTOKEN
- 微信的高级接口(通过微信认证后自动获得)可以做什么?微信服务号定制开发能为企业带来什么?
- 微信公共服务平台开发(.Net 的实现)3-------发送文本消息
- 微信公共服务平台开发(.Net 的实现)8-------处理图片(上传下载发送)
- 微信公共服务平台开发(.Net 的实现)12-------网页授权(上 :更加深入理解OAuth2.0 )
- 微信公共服务平台开发(.Net 的实现)5-------解决access_token过期的问题
- 微信公共服务平台开发(.Net 的实现)11-------客服消息(定项消息推送 重要的OPENID)