您的位置:首页 > 其它

多线程数据采集

2008-08-23 17:05 281 查看
private void button3_Click(object sender, EventArgs e)
{
Thread firstTread = new Thread(new ParameterizedThreadStart(GatherProduct));
Thread secondTread = new Thread(new ParameterizedThreadStart(GatherProduct));
ArrayList arr1 = new ArrayList();
arr1.Add(200);
arr1.Add(300);
ArrayList arr2 = new ArrayList();
arr1.Add(300);
arr1.Add(400);
firstTread.Start(arr1);
secondTread.Start(arr1);

}
public void GatherProduct(object obj)
{
ArrayList arr = (ArrayList)obj;

for (int i = Convert.ToInt32( arr[0].ToString()); i <Convert.ToInt32( arr[1].ToString()); i++)
{

string sHtml = utility.GetPageHTML("url" + i + ".html");
Thread.Sleep(10);
string sPattern = "href=/"(?<ProductURL>[^>]*)/"//sclass=black2//starget=_blank><strong>(?<ProductName>[^<]*)</strong>";
MatchCollection matchs = Regex.Matches(sHtml, sPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
string sPattern1 = "<TD//sheight=/"20/"//sbgcolor=/"f8f8f8/"//sclass=/"black12/"><a//shref=/"(?<CompanyURL>[^>]*)/"//sclass=orange>(?<CompanyName>[^<]*)</a></TD>";
MatchCollection matchs1 = Regex.Matches(sHtml, sPattern1, RegexOptions.IgnoreCase | RegexOptions.Singleline);
for (int j = 0; j < (matchs.Count > matchs1.Count ? matchs1.Count : matchs.Count); j++)
{
Model.pharmnetProduct productModel = new Model.pharmnetProduct();
productModel.ProductName = matchs[j].Groups["ProductName"].Value;
productModel.ProductURL = matchs[j].Groups["ProductURL"].Value;
productModel.CompanyName = matchs1[j].Groups["CompanyName"].Value;
productModel.CompanyURL = matchs1[j].Groups["CompanyURL"].Value;
new BLL.pharmnetProduct().Add(productModel);
}
}
}
public static string GetPageHTML(string url)
{
HttpWebRequest wr = WebRequest.Create(url) as HttpWebRequest;
try
{
wr.Method = "get";
wr.Accept = "*/*";
wr.Headers.Add("Accept-Language: zh-cn");
wr.Headers.Add("UA-CPU: x86");
wr.Headers.Add("Accept-Encoding: gzip, deflate");
wr.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Embedded Web Browser from: http://bsalsa.com/; InfoPath.2; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
wr.KeepAlive = true;
wr.ServicePoint.Expect100Continue = false;
wr.AllowAutoRedirect = false;

HttpWebResponse wre = wr.GetResponse() as HttpWebResponse;
StreamReader sreader = new StreamReader(wre.GetResponseStream(), Encoding.Default);
string sHtml = sreader.ReadToEnd();
wre.Close();
return sHtml;
}
catch (Exception ex)
{
throw ex;
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: