多线程数据采集
2008-08-23 17:05
281 查看
private void button3_Click(object sender, EventArgs e)
{
Thread firstTread = new Thread(new ParameterizedThreadStart(GatherProduct));
Thread secondTread = new Thread(new ParameterizedThreadStart(GatherProduct));
ArrayList arr1 = new ArrayList();
arr1.Add(200);
arr1.Add(300);
ArrayList arr2 = new ArrayList();
arr1.Add(300);
arr1.Add(400);
firstTread.Start(arr1);
secondTread.Start(arr1);
}
public void GatherProduct(object obj)
{
ArrayList arr = (ArrayList)obj;
for (int i = Convert.ToInt32( arr[0].ToString()); i <Convert.ToInt32( arr[1].ToString()); i++)
{
string sHtml = utility.GetPageHTML("url" + i + ".html");
Thread.Sleep(10);
string sPattern = "href=/"(?<ProductURL>[^>]*)/"//sclass=black2//starget=_blank><strong>(?<ProductName>[^<]*)</strong>";
MatchCollection matchs = Regex.Matches(sHtml, sPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
string sPattern1 = "<TD//sheight=/"20/"//sbgcolor=/"f8f8f8/"//sclass=/"black12/"><a//shref=/"(?<CompanyURL>[^>]*)/"//sclass=orange>(?<CompanyName>[^<]*)</a></TD>";
MatchCollection matchs1 = Regex.Matches(sHtml, sPattern1, RegexOptions.IgnoreCase | RegexOptions.Singleline);
for (int j = 0; j < (matchs.Count > matchs1.Count ? matchs1.Count : matchs.Count); j++)
{
Model.pharmnetProduct productModel = new Model.pharmnetProduct();
productModel.ProductName = matchs[j].Groups["ProductName"].Value;
productModel.ProductURL = matchs[j].Groups["ProductURL"].Value;
productModel.CompanyName = matchs1[j].Groups["CompanyName"].Value;
productModel.CompanyURL = matchs1[j].Groups["CompanyURL"].Value;
new BLL.pharmnetProduct().Add(productModel);
}
}
}
public static string GetPageHTML(string url)
{
HttpWebRequest wr = WebRequest.Create(url) as HttpWebRequest;
try
{
wr.Method = "get";
wr.Accept = "*/*";
wr.Headers.Add("Accept-Language: zh-cn");
wr.Headers.Add("UA-CPU: x86");
wr.Headers.Add("Accept-Encoding: gzip, deflate");
wr.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Embedded Web Browser from: http://bsalsa.com/; InfoPath.2; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
wr.KeepAlive = true;
wr.ServicePoint.Expect100Continue = false;
wr.AllowAutoRedirect = false;
HttpWebResponse wre = wr.GetResponse() as HttpWebResponse;
StreamReader sreader = new StreamReader(wre.GetResponseStream(), Encoding.Default);
string sHtml = sreader.ReadToEnd();
wre.Close();
return sHtml;
}
catch (Exception ex)
{
throw ex;
}
}
{
Thread firstTread = new Thread(new ParameterizedThreadStart(GatherProduct));
Thread secondTread = new Thread(new ParameterizedThreadStart(GatherProduct));
ArrayList arr1 = new ArrayList();
arr1.Add(200);
arr1.Add(300);
ArrayList arr2 = new ArrayList();
arr1.Add(300);
arr1.Add(400);
firstTread.Start(arr1);
secondTread.Start(arr1);
}
public void GatherProduct(object obj)
{
ArrayList arr = (ArrayList)obj;
for (int i = Convert.ToInt32( arr[0].ToString()); i <Convert.ToInt32( arr[1].ToString()); i++)
{
string sHtml = utility.GetPageHTML("url" + i + ".html");
Thread.Sleep(10);
string sPattern = "href=/"(?<ProductURL>[^>]*)/"//sclass=black2//starget=_blank><strong>(?<ProductName>[^<]*)</strong>";
MatchCollection matchs = Regex.Matches(sHtml, sPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
string sPattern1 = "<TD//sheight=/"20/"//sbgcolor=/"f8f8f8/"//sclass=/"black12/"><a//shref=/"(?<CompanyURL>[^>]*)/"//sclass=orange>(?<CompanyName>[^<]*)</a></TD>";
MatchCollection matchs1 = Regex.Matches(sHtml, sPattern1, RegexOptions.IgnoreCase | RegexOptions.Singleline);
for (int j = 0; j < (matchs.Count > matchs1.Count ? matchs1.Count : matchs.Count); j++)
{
Model.pharmnetProduct productModel = new Model.pharmnetProduct();
productModel.ProductName = matchs[j].Groups["ProductName"].Value;
productModel.ProductURL = matchs[j].Groups["ProductURL"].Value;
productModel.CompanyName = matchs1[j].Groups["CompanyName"].Value;
productModel.CompanyURL = matchs1[j].Groups["CompanyURL"].Value;
new BLL.pharmnetProduct().Add(productModel);
}
}
}
public static string GetPageHTML(string url)
{
HttpWebRequest wr = WebRequest.Create(url) as HttpWebRequest;
try
{
wr.Method = "get";
wr.Accept = "*/*";
wr.Headers.Add("Accept-Language: zh-cn");
wr.Headers.Add("UA-CPU: x86");
wr.Headers.Add("Accept-Encoding: gzip, deflate");
wr.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Embedded Web Browser from: http://bsalsa.com/; InfoPath.2; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
wr.KeepAlive = true;
wr.ServicePoint.Expect100Continue = false;
wr.AllowAutoRedirect = false;
HttpWebResponse wre = wr.GetResponse() as HttpWebResponse;
StreamReader sreader = new StreamReader(wre.GetResponseStream(), Encoding.Default);
string sHtml = sreader.ReadToEnd();
wre.Close();
return sHtml;
}
catch (Exception ex)
{
throw ex;
}
}
相关文章推荐
- 【多线程数据采集之三】java抓取数据+破解屏蔽ip访问 .
- python数据采集与多线程效率分析
- java多线程采集+线程同步-【多线程数据采集之四】
- Python3 多线程数据采集中的一些坑
- java抓取数据+破解屏蔽ip访问【多线程数据采集之三】
- java多线程网络数据采集&nbsp;&nbsp;1
- php多线程采集网页数据-php采集网页-php爬虫视频教程8
- java采集网页数据方法【多线程数据采集之一】
- java+Jsoup 正则过滤html网页标签【多线程数据采集之二】
- java采集网页数据方法【多线程数据采集之一】
- 多线程模拟数据采集、显示
- java采集网页数据方法【多线程数据采集之一】
- java采集网页数据方法【多线程数据采集之一】
- java+Jsoup 正则过滤html网页标签【多线程数据采集之二】
- java+Jsoup 正则过滤html网页标签【多线程数据采集之二】
- java+Jsoup 正则过滤html网页标签【多线程数据采集之二】
- java抓取数据+破解屏蔽ip访问【多线程数据采集之三】
- java多线程采集+线程同步-【多线程数据采集之四】
- python数据采集与多线程效率分析
- 用Delphi在工业控制和自动化实现多线程进行数据采集