您的位置:首页 > 其它

网页数据抓取之新浪新闻数据

2017-01-22 16:27 176 查看
这里根据自己的需求抓取了部分数据 ,可以根据自己的需求做相应改动:

package com.atman.baiye.store.utils;

import java.net.URLDecoder;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;

import javapns.json.JSONArray;
import javapns.json.JSONException;
import javapns.json.JSONObject;

import com.atman.baiye.store.domain.AiCommonInfo;

/**
* remark
* dwzhou@atman.com
* 2017年1月19日上午9:57:14
*/
public class GoodsXinLangNewsUtils {

// public static final String XINLANG_URL = "http://api.search.sina.com.cn/?c=news&q=";
// public static final String XINLANG_URL = "http://www.sina.com.cn/mid/search.shtml?q=";
public static final String XINLANG_URL = "http://api.search.sina.com.cn/?c=news&t=&q=";

public static JSONArray getArrayData(String keyword, String charset) {
JSONArray jsonarray = new JSONArray();
try {
String url;
url = XINLANG_URL + URLEncoder.encode(keyword);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
// String stime = sdf.format(getBeginDayOfWeek());
// String etime = sdf.format(getEndDayOfWeek());
//"&stime="+stime+"&etime="+etime+
url +="&sort=rel&highlight=1&num=10&ie=utf-8";
System.out.println("url:"+url);
String data = WebHttpClient.getBebContentByURL(url, "", false, charset);
JSONObject jsondata = new JSONObject(data);
System.out.println(jsondata.get("result").toString());
jsondata = new JSONObject(jsondata.get("result").toString());
jsonarray = new JSONArray(jsondata.get("list").toString());
System.out.println("找到相关新闻"+jsonarray.length()+"篇");
} catch (JSONException e) {
e.printStackTrace();
}
return jsonarray;
}

public static List<AiCommonInfo> getGoodsInfoList(JSONArray array, String keyword){
List<AiCommonInfo> list = new ArrayList<AiCommonInfo>();
try {
for(int i=0; i<array.length(); i++){
AiCommonInfo aiCommonInfo = new AiCommonInfo();
aiCommonInfo.setType(1009);
aiCommonInfo.setKeyword(keyword);
aiCommonInfo.setSource(4);
JSONObject json = new JSONObject(array.getJSONObject(i).toString());
String title = json.get("origin_title").toString();
System.out.println("title:"+title);

String detail_url = json.get("url").toString();
System.out.println("detail_url:"+detail_url);

String pic_url = (json.has("imgurl"))?json.get("imgurl").toString():"";
System.out.println("pic_url:"+pic_url);

String store_name = json.get("media").toString();
System.out.println("store_name:"+store_name);

String intro = json.get("intro").toString();
System.out.println("intro:"+intro);

String datetime = json.get("datetime").toString();
System.out.println("datetime:"+datetime);

aiCommonInfo.setDetailUrl(detail_url);
aiCommonInfo.setPicUrl(pic_url);
aiCommonInfo.setStoreName(store_name);
aiCommonInfo.setTitle(title);
list.add(aiCommonInfo);
}
} catch (JSONException e) {
e.printStackTrace();
}

return list;
}
public static void main(String[] args) {
JSONArray data = getArrayData("新闻联播迎新主播", "utf-8");
getGoodsInfoList(data, "新闻联播迎新主播");
}
}









                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: