您的位置:首页 > 其它

.NET RSS新闻自动采集

2008-05-14 08:58 337 查看
创建应用程序

[align=left] void Application_Start(object sender, EventArgs e) [/align]
[align=left] {[/align]
[align=left] AppLine.RssUrl = @"http://news.163.com/special/00011K6L/rss_newstop.xml";[/align]
[align=left] AppLine.NewPath = Server.MapPath("~/Get_New/");[/align]
[align=left] AppLine appline = new AppLine();[/align]
}

AppLin

[align=left]using System;[/align]
[align=left]using System.Data;[/align]
[align=left]using System.Configuration;[/align]
[align=left]using System.Web;[/align]
[align=left]using System.Web.Security;[/align]
[align=left]using System.Web.UI;[/align]
[align=left]using System.Web.UI.WebControls;[/align]
[align=left]using System.Web.UI.WebControls.WebParts;[/align]
[align=left]using System.Web.UI.HtmlControls;[/align]
[align=left]using System.IO;[/align]
[align=left]using System.Net;[/align]
[align=left]using System.Text;[/align]
[align=left]using System.Threading;[/align]
[align=left]using System.Text.RegularExpressions;[/align]
[align=left] [/align]
[align=left] [/align]
[align=left]///<summary>[/align]
[align=left]/// AppLine 的摘要说明[/align]
[align=left]///</summary>[/align]
[align=left]public class AppLine : Page[/align]
[align=left]{[/align]
[align=left] [/align]
[align=left] public static bool bStop;[/align]
[align=left] public static DataSet ds;[/align]
[align=left] public static string RssUrl;[/align]
[align=left] public static string htmltext;[/align]
[align=left] public static string NewPath;[/align]
[align=left] WebRequest request;[/align]
[align=left] Regex rex;[/align]
[align=left] public AppLine()[/align]
[align=left] {[/align]
[align=left] new Thread(new ThreadStart(ThreadProc)).Start();[/align]
[align=left] }[/align]
[align=left] [/align]
[align=left] public void ThreadProc()[/align]
[align=left] {[/align]
[align=left] while (!bStop)[/align]
[align=left] {[/align]
[align=left] [/align]
[align=left] ds = new DataSet();[/align]
[align=left] try[/align]
[align=left] {[/align]
[align=left] ds.ReadXml(RssUrl);[/align]
[align=left] }[/align]
[align=left] catch [/align]
[align=left] {[/align]
[align=left] return;[/align]
[align=left] }[/align]
[align=left] [/align]
[align=left] foreach (DataRow row in ds.Tables["item"].Rows)[/align]
[align=left] {[/align]
[align=left] if (row["link"].ToString().IndexOf("news") != -1)[/align]
[align=left] Get_New_163(row["link"].ToString(), row["title"].ToString());[/align]
[align=left] }[/align]
[align=left] ds.Reset();[/align]
[align=left] Thread.Sleep(3600000); [/align]
[align=left] }[/align]
[align=left] }[/align]
[align=left] [/align]
[align=left] private void Get_New_163(string this_url,string title) [/align]
[align=left] {[/align]
[align=left] string Directory_Name = NewPath + "//" + DateTime.Now.ToString("yyyyMMdd");[/align]
[align=left] rex = new Regex(@"//|/|:|/*|/?|<|>|/|");[/align]
[align=left] title = rex.Replace(title, "");[/align]
[align=left] string File_Name = Directory_Name + "//" + title + ".html";[/align]
[align=left] [/align]
[align=left] if (!Directory.Exists(Directory_Name))[/align]
[align=left] {[/align]
[align=left] Directory.CreateDirectory(Directory_Name);[/align]
[align=left] }[/align]
[align=left] if (!File.Exists(File_Name))[/align]
[align=left] {[/align]
[align=left] request = WebRequest.Create(this_url);[/align]
[align=left] WebResponse response = request.GetResponse();[/align]
[align=left] Stream resStream = response.GetResponseStream();[/align]
[align=left] StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);[/align]
[align=left] htmltext = sr.ReadToEnd();[/align]
[align=left] sr.Close();[/align]
[align=left] sr.Dispose(); [/align]
[align=left] [/align]
[align=left] /*********************格式化************************/[/align]
[align=left] int index_end = htmltext.IndexOf("<!-- 分页 -->");[/align]
[align=left] int index_star = htmltext.IndexOf("<span id=/"digest/">");[/align]
[align=left] if (index_end != -1 && index_star != -1)[/align]
[align=left] {[/align]
[align=left] rex = new Regex("(<h1 id=/"endTitle/">.*</h1>)", RegexOptions.IgnoreCase);[/align]
[align=left] [/align]
[align=left] string tem = "";[/align]
[align=left] foreach (Match match in rex.Matches(htmltext, 0))[/align]
[align=left] {[/align]
[align=left] tem = match.ToString();[/align]
[align=left] }[/align]
[align=left] [/align]
[align=left] tem += htmltext.Substring(index_star, index_end - index_star);[/align]
[align=left] rex = new Regex("(<a href.*</a>)", RegexOptions.IgnoreCase);[/align]
[align=left] htmltext = rex.Replace(tem, "");[/align]
[align=left] [/align]
[align=left] //保存[/align]
[align=left] StreamWriter write = new StreamWriter(File_Name, false, Encoding.GetEncoding("GB2312"));[/align]
[align=left] write.Write(htmltext);[/align]
[align=left] write.Close();[/align]
[align=left] resStream.Close();[/align]
[align=left] }[/align]
[align=left] [/align]
[align=left] /*********************格式化************************/ [/align]
[align=left] } [/align]
[align=left] }[/align]
}
实例下载地址:http://d.download.csdn.net/down/454200/sunchaohuang
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: