您的位置:首页 > 其它

”抓取“

2016-01-24 23:26 429 查看
1.数据库表格的建立;
2.利用"抓取"模板;3.添加引用:  程序包:Jumony,Json;   项目:WebBot中的dll文档,位于bin目录下.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Skay.WebBot;
using Ivony.Html;
using Ivony.Html.Parser;
using System.Threading;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.Data.SqlClient;
namespace 抓取
{
class Program
{
public static Thread th;
static void Main(string[] args)
{
SqlConnection conn = new SqlConnection("Data Source=PC-201511281446;Initial Catalog=数据;Integrated Security=True");
conn.Open();
th = new Thread(new ThreadStart(GetJDData));
th.Start();
conn.Close();

}
private static void GetJDData()
{
HttpUtility http = new HttpUtility();
string html = http.GetHtmlText("http://www.htluxe.com/category.php?category=63&display=list&brand=0&price_min=0&price_max=0&filter_attr=0&page=1&sort=goods_id&order=DESC#goods_list");
var documenthtml = new JumonyParser().Parse(html);
var items = documenthtml.Find(".piclist li");
//Console.WriteLine(items.Count());数据的总数
foreach (var item in items)
{
SqlConnection conn = new SqlConnection("Data Source=PC-201511281446;Initial Catalog=数据;Integrated Security=True");
conn.Open();
string title = item.FindFirst(".base a").InnerText();
string min_price = item.FindFirst(".base .price .minprice").InnerText();
string max_price = item.FindFirst(".base .price .maxprice").InnerText();
string goodsurl = item.FindFirst(".base a").Attribute("href").Value();
string visitsum = item.FindFirst(".sum .ratecount strong").InnerText();
string salesum = item.FindFirst(".sum .soldnum strong").InnerText();
goodsurl = "http://www.htluxe.com/" + goodsurl;//需拼接完整,才能 用http.GetHtmlText()
Console.WriteLine(title);
string Area_Html = http.GetHtmlText(goodsurl, "utf-8", "text/html; charset=utf-8", "");
//获取网址
var documenthtml1 = new JumonyParser().Parse(Area_Html);
var item1s = documenthtml1.Find(".promotionMiddleTop");
//获取所需要的东西,可将其看为数,遍历取自己所要的;
string Code = "";
foreach(var item1 in item1s)
{
Code = item1.FindFirst(".bh").InnerText();
}
var item2s = documenthtml1.Find(".comment-tab .clearfix li");
string Bestping = "";
string Goodping = "";
string Badping = "";
int flag = 0;
foreach (var item2 in item2s)
{
if (flag == 0)
{
flag++;
}
else if (flag == 1)
{
flag++;
Bestping = item2.FindFirst("a").InnerText();
}
else if (flag == 2)
{
flag++;
Goodping = item2.FindFirst("a").InnerText();
}
else if(flag == 3)
{
flag++;
Badping = item2.FindFirst("a").InnerText();
}

}
string sql = string.Format("insert into milk values ('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}')",title,Code,min_price,max_price,salesum,visitsum,Bestping,Goodping,Badping );
SqlCommand cmd = new SqlCommand(sql, conn);
cmd.ExecuteNonQuery();
conn.Close();
}
Console.ReadKey();
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: