c#抓取网页(带解析js)
2018-02-06 18:29
1611 查看
抓取中国银行汇率 (phantomjs-1.9.2-windows + Selenium.WebDriver.3.8.0)
直接上代码
1.这个网站不是直接通过ajax请求数据,如果是这样通过等待就可以抓取到数据,该网站先是通过返回的js生成cookie,然后带上cookie访问动态地址,然后再生成cookie,带上所有的cookie,再去访问302,最后得到结果。具体看参考http://www.jianshu.com/p/11fac0596020
2.参考抓取获取cookies https://www.cnblogs.com/songxingzhu/p/7110723.html
3.获取里面的js变量 http://michaelthelin.se/javascript/testing/webdriver/2013/02/14/webdriver-reading-the-value-of-a-javascript-variable-spoiler-weirdness.html
1、进入搜索页面,得到js
2、htmlfile.write反混淆js,得到类似的两个函数function KTKY2RBD9NHPBCIHV9ZMEQQDARSLVFDU(str)和function QWERTASDFGXYSF()
3、运行这两个函数,得到两个cookie
cookieString = "wzwstemplate=" + KTKY2RBD9NHPBCIHV9ZMEQQDARSLVFDU(template.toString()) + "; path=/";
var confirm = QWERTASDFGXYSF();
cookieString = "wzwschallenge=" + KTKY2RBD9NHPBCIHV9ZMEQQDARSLVFDU(confirm.toString()) + "; path=/";
4、根据dynamicurl中的地址,带着三个Cookie: wzwsconfirm= wzwstemplate= wzwschallenge=
得到Cookie ccpassport=,和320跳转
5、带着4个cookie,经过两次320,就可以进入search页面,获得JSESSIONID,后面就好办了
这个网站比较复杂。
2.收费:https://www.nrecosite.com/phantomjs_wrapper_net.aspx
3.抓取中行 http://xusheng.org/blog/2016/10/19/ru-he-zhua-qu-diao-cha-tong-ji-si-de-shu-ju/
4.Webdriver: Reading the value of a Javascript
variable (spoiler: weirdness):
http://michaelthelin.se/javascript/testing/webdriver/2013/02/14/webdriver-reading-the-value-of-a-javascript-variable-spoiler-weirdness.html
5. Python小记:selenium+PhantomJS爬虫解决页面js添加cookie
: https://www.jianshu.com/p/11fac0596020
直接上代码
using LTITools.util; using OpenQA.Selenium; using OpenQA.Selenium.PhantomJS; using System; using System.Collections.Generic; using System.Data; using System.IO; using System.Linq; using System.Text.RegularExpressions; using System.Threading; using System.Windows.Forms; namespace LTITools { /// <summary> /// 1.第一次访问,头部会写cookies,同时返回的是js,js是加密后的字符串,需要反序列号然后执行。 /// 2.js会判断当前的浏览器window的宽高 /// 3.js执行后会再次写cookies,同时跳转指定的解析出来的地址。 /// 4.第二个地址回写cookies,同时头部302跳转。 /// 5.后续需要带着一二次的访问返回的cookies进行访问。 /// </summary> public partial class ChinaBankRate : Form { public ChinaBankRate() { InitializeComponent(); InitData(); } int _foreachPageCount = 1; string _url = ""; int _totalCount = 1; int _totalPage = 0; int _stopMSec = 1000; IEnumerable<ChinaBankRateListItem> monthList; private void InitData() { txtUrl.Text = "http://www.pbc.gov.cn/zhengcehuobisi/125207/125217/125925/17105/index{0}.html"; chkClearOldData.Checked = false; dtBeginDate.Text = DateTime.Now.AddMonths(-1).ToShortDateString(); dtBeginDate.CustomFormat = "yyyy-MM"; dtEndDate.CustomFormat = "yyyy-MM"; dtBeginDate.Format = DateTimePickerFormat.Custom; dtEndDate.Format = DateTimePickerFormat.Custom; dtBeginDate.MinDate = Convert.ToDateTime("2015-8-1"); dtEndDate.MinDate = Convert.ToDateTime("2015-8-1"); dtEndDate.MaxDate = DateTime.Now; txtAbout.AppendText(" 1.请先通过[第一步,数据抓取]Tab进行抓取,抓取会遇到IP禁用、防抓取网络异常等," + "如有异常,可以进行多次抓取(注:抓取过程中会弹出黑框界面,抓取完成后会自动关闭);"); txtAbout.AppendText("\n 2.抓取成功后,通过[第二步,数据导出]导出指定月份的数据(注:如果抓取过程中,则不能进行导出);"); txtAbout.AppendText("\n 3.仅能导出指定当月日期最大的汇率数据(注:仅支持导出2015年8月以后的数据);"); } private void ClearData() { File.Delete(GetExcelPath()); } /// <summary> /// 第一轮先抓取列表数据 /// 第二轮在循环列表数据抓取具体内容 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnOk_Click(object sender, EventArgs e) { btnOk.Enabled = false; btnExportData.Enabled = false; _url = txtUrl.Text.Trim(); _stopMSec = Convert.ToInt32(txtStop.Text.Trim()); _stopMSec = _stopMSec < 100 ? 100 : _stopMSec; if (chkClearOldData.Checked) { ClearData(); } Thread t = new Thread(new ThreadStart(DoWorkGetList)); t.IsBackground = true; t.Start(); } private void GotoURLAndCheckCookies(PhantomJSDriver driver, string url) { //var js = "var w= window.innerWidth||document.documentElement.clientWidth||document.body.clientWidth;var h= window.innerHeight||document.documentElement.clientHeight||document.body.clientHeight;" // + "document.body.setAttribute(\"wh\", w*h);"; //driver.ExecutePhantomJS(js); //var wh = driver.FindElement(By.TagName("body")).GetAttribute("wh"); //if (driver.PageSource.Contains("dynamicurl")) //{ // driver.ExecuteScript("document.body.setAttribute(\"cookieString\", HXXTTKKLLPPP5);"); // var cookieString = driver.FindElement(By.TagName("body")).GetAttribute("cookieString"); // var newjs = cookieString.Replace("if(findDimensions())", "if(false)") // .Replace("if(findDimensions())", "if(false)"); // driver.ExecutePhantomJS(newjs + " HXXTTKKLLPPP5();"); //} } private void DoWorkGetList() { Action<String> AsynclblResultAsy = delegate(string text) { lblResult.Text = text; }; Action<String> AsyncUIDelegateResult = delegate(string text) { txtResult.AppendText(text); }; Action AsyncUIDelegateDone = delegate() { btnOk.Enabled = true; }; Action AsyncUIDelegateExportDone = delegate { btnExportData.Enabled = true; }; txtResult.Invoke(AsyncUIDelegateResult, new object[] { DateTime.Now.ToShortTimeString() + "开始执行,正在抓取列表数据...\n" }); PhantomJSDriver driver = new PhantomJSDriver(GetPhantomJSDriverService()); driver.Manage().Window.Size = new System.Drawing.Size() { Height = 800, Width = 600 }; var ExistDatalists = GetExistData(); var historyTotalCount = ExistDatalists.Count(); for (var i = 1; i <= _foreachPageCount; i++) { var url = string.Format(_url, i); driver.Navigate().GoToUrl(url); ///随机暂停 int randKey = new Random().Next(100, _stopMSec); Thread.Sleep(randKey); //总条数和分页 if (i == 1) { Thread.Sleep(_stopMSec); var totalinfos = driver.FindElement(By.CssSelector("td[class='Normal']")); if (null == totalinfos || !string.IsNullOrEmpty(totalinfos.Text)) { Thread.Sleep(_stopMSec); totalinfos = driver.FindElement(By.CssSelector("td[class='Normal']")); } if (null != totalinfos && !string.IsNullOrEmpty(totalinfos.Text)) { _totalCount = Convert.ToInt32(totalinfos.Text.Split(',')[0].Split(':')[1]); if (historyTotalCount != _totalCount) { if ((_totalCount - historyTotalCount) % 20 != 0) _totalPage = (_totalCount - historyTotalCount) / 20 + 1; else _totalPage = (_totalCount - historyTotalCount) / 20; } else { break; } _foreachPageCount = _totalPage; } else { txtResult.Invoke(AsyncUIDelegateResult, new object[] { "totalinfos为空,抓取异常,请稍后试\n" }); break; } } ///如果当前也超出 if (i > _foreachPageCount) break; //list数据 var lists = driver.FindElements(By.CssSelector("font[class='newslist_style'] > a")); if (lists == null || lists.Count == 0) { Thread.Sleep(_stopMSec); lists = driver.FindElements(By.CssSelector("font[class='newslist_style'] > a")); } if (lists == null || lists.Count == 0) { txtResult.Invoke(AsyncUIDelegateResult, new object[] { "lists为空,抓取异常,请稍后试\n" }); break; } var breakFlag = ""; foreach (var item in lists) { var identifierDate = item.Text.Split('中')[0].Trim(); if (!ExistDatalists.Any(t => t.Identifier == identifierDate) && Convert.ToDateTime(identifierDate) > Convert.ToDateTime("2015-8-1")) { ExistDatalists.Add(new ChinaBankRateListItem() { PIdentifier = i.ToString(), Identifier = identifierDate, Href = item.GetAttribute("href"), Title = item.Text, IsSucess = "true", HtmlContent = "", }); } else { breakFlag = identifierDate; break; } } if (!string.IsNullOrEmpty(breakFlag)) { txtResult.Invoke(AsyncUIDelegateResult, new object[] { "当前已包含 " + breakFlag + "\n" }); break; } txtResult.Invoke(AsyncUIDelegateResult, new object[] { "处理完成行第[" + i + "]条列表(暂停" + randKey + "毫秒),url:" + url + "\n" }); } SaveDataToExcel(ExistDatalists.OrderByDescending(t => Convert.ToDateTime(t.Identifier))); txtResult.Invoke(AsyncUIDelegateResult, new object[] { "列表数据抓取完成!\n" }); //开始抓取列表数据 var blret = DoWorkGetDetail(driver, AsyncUIDelegateResult, AsynclblResultAsy); btnOk.Invoke(AsyncUIDelegateDone); btnExportData.Invoke(AsyncUIDelegateExportDone); lblResult.Invoke(AsynclblResultAsy, new object[] { string.Format("全部处理完成({0}),{1}", (blret ? "成功" : "有异常,请继续点击抓取开始"), DateTime.Now) }); driver.Quit(); } private bool DoWorkGetDetail(PhantomJSDriver driver, Action<String> AsyncUIDelegate, Action<String> AsynclblResultAsy) { bool blret = true; var ExistDatalists = GetExistData(); try { var items = ExistDatalists.Where(t => string.IsNullOrEmpty(t.HtmlContent)).ToArray(); lblResult.Invoke(AsynclblResultAsy, new object[] { string.Format("开始抓取详情页面,总共{0}条数据...", items.Count()) }); for (var i = 0; i < items.Count(); i++) { driver.Navigate().GoToUrl(items[i].Href); ///随机暂停 int randKey = new Random().Next(100, _stopMSec); Thread.Sleep(randKey); var content = driver.FindElementByCssSelector("div[id='zoom'] > p"); if (content == null || string.IsNullOrEmpty(content.Text)) { Thread.Sleep(_stopMSec); content = driver.FindElementByCssSelector("div[id='zoom'] > p"); } if (content != null && !string.IsNullOrEmpty(content.Text)) { items[i].HtmlContent = content.Text; items[i].IsSucess = "true"; } else { items[i].IsSucess = "false"; } txtResult.Invoke(AsyncUIDelegate, new object[] { string.Format("处理第[{0}]条(暂停{1}毫秒),日期[{2}] \n", i + 1, randKey, items[i].Identifier) }); } } catch (Exception ex) { blret = false; txtResult.Invoke(AsyncUIDelegate, new object[] { "详情数据抓取异常" + ex.StackTrace + "\n" }); } SaveDataToExcel(ExistDatalists); txtResult.Invoke(AsyncUIDelegate, new object[] { "详情数据抓取完成!\n" }); txtResult.Invoke(AsyncUIDelegate, new object[] { "全部数据抓取完成!\n" }); return blret; } /// <summary> /// 设置代理 /// </summary> /// <returns></returns> private static PhantomJSDriverService GetPhantomJSDriverService() { PhantomJSDriverService pds = PhantomJSDriverService.CreateDefaultService(); //设置代理服务器地址 //pds.Proxy = $"{ip}:{port}"; //设置代理服务器认证信息 //pds.ProxyAuthentication = GetProxyAuthorization(); return pds; } private bool SaveDataToExcel(IEnumerable<ChinaBankRateListItem> list) { var dt = new DataTable(); dt.Columns.Add("Identifier"); dt.Columns.Add("PIdentifier"); dt.Columns.Add("Title"); dt.Columns.Add("IsSucess"); dt.Columns.Add("Href"); dt.Columns.Add("HtmlContent"); DataRow dr = null; foreach (var item in list) { dr = dt.NewRow(); var index = 0; dr[index++] = item.Identifier; dr[index++] = item.PIdentifier; dr[index++] = item.Title; dr[index++] = item.IsSucess; dr[index++] = item.Href; dr[index++] = item.HtmlContent; dt.Rows.Add(dr); } File.Delete(GetExcelPath()); ExcelHelper.SaveExcelToFile(GetExcelPath(), dt); return true; } private string GetExcelPath(string name = "data") { DirectoryInfo baseDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory); return baseDir + "\\" + name + ".xlsx"; } private List<ChinaBankRateListItem> GetExistData() { var path = GetExcelPath(); var list = new List<ChinaBankRateListItem>(); if (File.Exists(path)) { var dt = ExcelHelper.ReadExcelFile(path, 0); foreach (DataRow row in dt.Rows) { list.Add(new ChinaBankRateListItem() { Href = row["Href"].ToString().Trim(), HtmlContent = row["HtmlContent"].ToString().Trim(), Identifier = row["Identifier"].ToString().Trim(), Title = row["Title"].ToString().Trim(), IsSucess = row["IsSucess"].ToString().Trim(), PIdentifier = row["PIdentifier"].ToString().Trim(), }); } } return list; } /// <summary> /// 导出数据 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void btnExportData_Click(object sender, EventArgs e) { btnExportData.Enabled = false; btnExportData.Text = "数据导出中..."; var list = GetExistData(); var beginDate = new DateTime(dtBeginDate.Value.Year,dtBeginDate.Value.Month,1) ; var endDate = new DateTime(dtEndDate.Value.Year, dtEndDate.Value.Month, 1).AddMonths(1).AddDays(-1); var export = list.Where(t => beginDate <= Convert.ToDateTime(t.Identifier) && Convert.ToDateTime(t.Identifier) <= endDate).OrderByDescending(t => Convert.ToDateTime(t.Identifier)); monthList = export.GroupBy(t => Convert.ToDateTime(t.Identifier).ToString("yyyy-MM")).Select(t => t.First()); //截图 Thread t1 = new Thread(new ThreadStart(ScreenCapture)); t1.IsBackground = true; t1.Start(); var allExportData = new List<ChinaBankRateExport>(); foreach (var item in monthList) { var datas = GetRateFromHtmlContent(Convert.ToDateTime(item.Identifier), item.HtmlContent); allExportData.AddRange(datas); } var dt = new DataTable(); dt.Columns.Add("起兑币种"); dt.Columns.Add("兑换币种"); dt.Columns.Add("汇率"); dt.Columns.Add("状态"); dt.Columns.Add("生效日期"); dt.Columns.Add("备注"); DataRow dr = null; foreach (var item in allExportData) { dr = dt.NewRow(); var index = 0; dr[index++] = item.From; dr[index++] = item.To; dr[index++] = item.Rate; dr[index++] = item.Status; dr[index++] = item.EffectiveDate; dr[index++] = item.Des; dt.Rows.Add(dr); } string baseDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory) + "\\Export"; if (!Directory.Exists(baseDir)) Directory.CreateDirectory(baseDir); var fullPath = baseDir + "\\From" + beginDate.ToString("yyyyMM") + "To" + endDate.ToString("yyyyMM") + "_" + DateTime.Now.ToString("yyyyMMddHHmmss") + ".xlsx"; ExcelHelper.SaveExcelToFile(fullPath, dt); System.Diagnostics.Process.Start(fullPath); btnExportData.Enabled = true; btnExportData.Text = "导出指定日期数据"; lblExportResult.Text = "导出完成!" + DateTime.Now.ToShortDateString(); } private List<ChinaBankRateExport> GetRateFromHtmlContent(DateTime dt, string htmlContent) { //var reg = @"(?i)(?<={0})(\d+(\.\d+)?)(?={1})"; var exportRate = new List<ChinaBankRateExport>(); var CNYtoFlags = new string[] { "人民币1元对" }; var toCNYFlags = new string[] { "对人民币" }; var arrHtml = htmlContent.Split(new char[] { ',', ',' }, StringSplitOptions.RemoveEmptyEntries); decimal rate = 0M; var reg = new Regex(@"\d+\.\d*"); foreach (var item in RateDic.RateNameDic) { foreach (var html in arrHtml) { if (html.Contains(item.Key)) { var math = reg.Match(html); if (math.Success) { rate = Convert.ToDecimal(math.Value); if (CNYtoFlags.Any(t => html.Contains(t))) { //人民币对外币需要转换成外币对人民币 rate = MathHelper.Round6P(1 / rate); } else if (toCNYFlags.Any(t => html.Contains(t))) { } if (html.Contains("100日元")) { rate = MathHelper.Round6P(rate / 100); } exportRate.Add(new ChinaBankRateExport() { Des = item.Key, EffectiveDate = dt, From = item.Value, To = "CNY", Rate = rate, Status = "有效", }); break; } } } } return exportRate; } private void ScreenCapture() { Action AsynclbtnShowImg = delegate { btnShowImg.Visible = true; }; string baseDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory) + "\\Export\\Capture\\"; if (!Directory.Exists(baseDir)) Directory.CreateDirectory(baseDir); try { foreach (var item in monthList) { var fullPath = baseDir + item.Identifier + ".png"; if (!File.Exists(fullPath)) { System.Diagnostics.Process p = new System.Diagnostics.Process(); p.StartInfo.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden; p.StartInfo.FileName = "phantomjs.exe"; p.StartInfo.WorkingDirectory = AppDomain.CurrentDomain.BaseDirectory; p.StartInfo.Arguments = " rasterize.js " + item.Href + " " + fullPath;//启动参数 p.Start(); p.WaitForExit(5000); } } } catch (Exception ex) { // lblExportResult.Invoke(AsynclblExportResult, new object[] { ex.StackTrace }); } lblExportResult.Invoke(AsynclbtnShowImg); } private void btnShowImg_Click(object sender, EventArgs e) { string baseDir = new DirectoryInfo(AppDomain.CurrentDomain.BaseDirectory) + "\\Export\\Capture"; System.Diagnostics.Process.Start(baseDir); } } #region 实体类 public class ChinaBankRateListItem { public string Identifier { set; get; }//日期 public string PIdentifier { set; get; }//父标识 public string Title { set; get; } public string IsSucess { set; get; } public string Href { set; get; } public string HtmlContent { set; get; } } /* HKD 港币 IDR 印度尼西亚卢比 INR 印度卢比 USD 美元 EUR 欧元 GBP 英镑 TWD 新台币 CAD 加拿大元 MXN 墨西哥比索 AUD 澳大利亚元 BRL 巴西雷阿尔 KRW 韩国元 MYR 马来西亚林吉特 JPY 日元 ZAR 南非兰特 THB 泰国铢 CHF 瑞士法郎 SGD 新加坡元 NZD 新西兰元 PHP 菲律宾比索 MOP 澳门元 CNY 人民币 NZD 新西兰元 SGD 新加坡 RUB 俄罗斯卢布 KRW 韩元 */ /// <summary> /// 刚开始是正则匹配,但是特色字符,空格出现问题,后面通过字符串截取和包含解决问题。 /// </summary> public static class RateDic { public static Dictionary<string, string> RateNameDic { set; get; } static RateDic() { RateNameDic = new Dictionary<string, string>(); RateNameDic.Add("美元", "USD"); RateNameDic.Add("印度卢比", "INR"); RateNameDic.Add("欧元", "EUR"); RateNameDic.Add("日元", "JPY"); RateNameDic.Add("港元", "HKD"); RateNameDic.Add("英镑", "GBP"); RateNameDic.Add("澳大利亚元", "AUD"); RateNameDic.Add("新西兰元", "NZD"); RateNameDic.Add("新加坡元", "SGD"); RateNameDic.Add("瑞士法郎", "CHF"); RateNameDic.Add("加拿大元", "CAD"); RateNameDic.Add("俄罗斯卢布", "RUB"); RateNameDic.Add("林吉特", "MYR"); RateNameDic.Add("南非兰特", "ZAR"); RateNameDic.Add("韩元", "KRW"); //RateNameDic.Add("阿联酋迪拉姆", "AED"); // RateNameDic.Add("沙特里亚尔", "SAR"); // RateNameDic.Add("匈牙利福林", "HUF"); // RateNameDic.Add("波兰兹罗提", "PLN"); // RateNameDic.Add("丹麦克朗", "DKK"); // RateNameDic.Add("瑞典克朗", "SEK"); // RateNameDic.Add("挪威克朗", "NOK"); //RateNameDic.Add("土耳其里拉", "TRY"); RateNameDic.Add("墨西哥比索", "MXN"); } } public class ChinaBankRateExport { public string From { set; get; } public string To { set; get; } public decimal Rate { set; get; } public DateTime EffectiveDate { set; get; } public string Status { set; get; } public string Des { set; get; } } #endregion }
1.这个网站不是直接通过ajax请求数据,如果是这样通过等待就可以抓取到数据,该网站先是通过返回的js生成cookie,然后带上cookie访问动态地址,然后再生成cookie,带上所有的cookie,再去访问302,最后得到结果。具体看参考http://www.jianshu.com/p/11fac0596020
2.参考抓取获取cookies https://www.cnblogs.com/songxingzhu/p/7110723.html
3.获取里面的js变量 http://michaelthelin.se/javascript/testing/webdriver/2013/02/14/webdriver-reading-the-value-of-a-javascript-variable-spoiler-weirdness.html
参考2:
1.这个网站处理办法如下:1、进入搜索页面,得到js
2、htmlfile.write反混淆js,得到类似的两个函数function KTKY2RBD9NHPBCIHV9ZMEQQDARSLVFDU(str)和function QWERTASDFGXYSF()
3、运行这两个函数,得到两个cookie
cookieString = "wzwstemplate=" + KTKY2RBD9NHPBCIHV9ZMEQQDARSLVFDU(template.toString()) + "; path=/";
var confirm = QWERTASDFGXYSF();
cookieString = "wzwschallenge=" + KTKY2RBD9NHPBCIHV9ZMEQQDARSLVFDU(confirm.toString()) + "; path=/";
4、根据dynamicurl中的地址,带着三个Cookie: wzwsconfirm= wzwstemplate= wzwschallenge=
得到Cookie ccpassport=,和320跳转
5、带着4个cookie,经过两次320,就可以进入search页面,获得JSESSIONID,后面就好办了
这个网站比较复杂。
参考3:
1.http://www.cnblogs.com/endlock/p/6423613.html 使用Selenium来操作PhantomJS绝配2.收费:https://www.nrecosite.com/phantomjs_wrapper_net.aspx
3.抓取中行 http://xusheng.org/blog/2016/10/19/ru-he-zhua-qu-diao-cha-tong-ji-si-de-shu-ju/
4.Webdriver: Reading the value of a Javascript
variable (spoiler: weirdness):
http://michaelthelin.se/javascript/testing/webdriver/2013/02/14/webdriver-reading-the-value-of-a-javascript-variable-spoiler-weirdness.html
5. Python小记:selenium+PhantomJS爬虫解决页面js添加cookie
: https://www.jianshu.com/p/11fac0596020
相关文章推荐
- 在java程序中使用jQuery抓取网页的新方法(java调用js解析引擎)
- C#网页解析获得HTML中JS变量,一个是浏览器交互(EvaluateScriptAsync),一个是HtmlAgilityPack解析
- 利用Python抓取和解析网页(转载) HTMLParser 和 urllib
- 用Python抓取网页并解析
- Node.js学习之网络爬虫(使用cheerio抓取网页数据)
- 菜鸟练习C#htmlparser----C#正则加htmlDOM进行网页解析腾讯新闻帖子列表相关信息提取
- C# 抓取网页里面的所有链接!
- C#实现通过程序自动抓取远程Web网页信息的代码
- Node.js实现的简易网页抓取功能示例
- 详解抓取网站,模拟登陆,抓取动态网页的原理和实现(Python,C#等)
- python--爬虫入门(八)体验HTMLParser解析网页,网页抓取解析整合练习
- c# 抓取Web网页数据分析
- Python实现抓取网页并且解析的实例
- [C#][固定格式网页解析]使用正则表达式处理网页的初步体会
- C# 使用 Abot 实现 爬虫 抓取网页信息 源码下载
- scrapy:python下的网页抓取及解析框架
- 网页爬虫抓取js动态渲染数据
- C# 抓取网页内容
- C# 解析网页的利器
- 使用python抓取js动态加载的网页