.NET2.0抓取网页全部链接
2008-05-01 04:37
459 查看
.Knu434{display:none;}效果图
后台代码
usingSystem;
usingSystem.Data;
usingSystem.Configuration;
usingSystem.Web;
usingSystem.Web.Security;
usingSystem.Web.UI;
usingSystem.Web.UI.WebControls;
usingSystem.Web.UI.WebControls.WebParts;
usingSystem.Web.UI.HtmlControls;
usingSystem.Text.RegularExpressions;
usingSystem.NET;
usingSystem.IO;
usingSystem.Collections;
publicpartialclass_Default:System.Web.UI.Page
{
protectedvoidPage_Load(objectsender,EventArgse)
{
if(!IsPostBack)
{
}
}
protectedvoidButton1_Click(objectsender,EventArgse)
{
TextBox2.Text="";
stringweb_url=this.TextBox1.Text;//"http://blog.csdn.NET/21aspNET/"
stringall_code="";
HttpWebRequestall_codeRequest=(HttpWebRequest)WebRequest.Create(web_url);
WebResponseall_codeResponse=all_codeRequest.GetResponse();
StreamReaderthe_Reader=newStreamReader(all_codeResponse.GetResponseStream());
all_code=the_Reader.ReadToEnd();
the_Reader.Close();
ArrayListmy_list=newArrayList();
stringp=@"http://([/w-]+/.)+[/w-]+(/[/w-./?%&=]*)?";
Regexre=newRegex(p,RegexOptions.IgnoreCase);
MatchCollectionmc=re.Matches(all_code);
for(inti=0;i<=mc.Count-1;i++)
{
bool_foo=false;
stringname=mc[i].ToString();
foreach(stringlistinmy_list)
{
if(name==list)
{
_foo=true;
break;
}
}//过滤 if(!_foo)
{
TextBox2.Text+=name+"/n";
}
}
}
}
前台
<%@PageLanguage="C#"AutoEventWireup="true" CodeFile="Default.aspx.cs"Inherits="_Default"%><!DOCTYPEhtmlPUBLIC"-//W3C//DTDXHTML1.0Transitional//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<htmlxmlns="http://www.w3.org/1999/xhtml">
<headrunat="server">
<title>抓取网页所有链接</title>
</head>
<body>
<formid="form1"runat="server">
<div>
<asp:TextBoxID="TextBox1"runat="server"Width="481px"></asp:TextBox>
<asp:ButtonID="Button1"runat="server"OnClick="Button1_Click"Text="提取"/>
<br/>
<asp:TextBoxID="TextBox2"runat="server"Height="304px"TextMode="MultiLine"Width="524px"></asp:TextBox></div>
</form>
</body>
</html>
后台代码
usingSystem;
usingSystem.Data;
usingSystem.Configuration;
usingSystem.Web;
usingSystem.Web.Security;
usingSystem.Web.UI;
usingSystem.Web.UI.WebControls;
usingSystem.Web.UI.WebControls.WebParts;
usingSystem.Web.UI.HtmlControls;
usingSystem.Text.RegularExpressions;
usingSystem.NET;
usingSystem.IO;
usingSystem.Collections;
publicpartialclass_Default:System.Web.UI.Page
{
protectedvoidPage_Load(objectsender,EventArgse)
{
if(!IsPostBack)
{
}
}
protectedvoidButton1_Click(objectsender,EventArgse)
{
TextBox2.Text="";
stringweb_url=this.TextBox1.Text;//"http://blog.csdn.NET/21aspNET/"
stringall_code="";
HttpWebRequestall_codeRequest=(HttpWebRequest)WebRequest.Create(web_url);
WebResponseall_codeResponse=all_codeRequest.GetResponse();
StreamReaderthe_Reader=newStreamReader(all_codeResponse.GetResponseStream());
all_code=the_Reader.ReadToEnd();
the_Reader.Close();
ArrayListmy_list=newArrayList();
stringp=@"http://([/w-]+/.)+[/w-]+(/[/w-./?%&=]*)?";
Regexre=newRegex(p,RegexOptions.IgnoreCase);
MatchCollectionmc=re.Matches(all_code);
for(inti=0;i<=mc.Count-1;i++)
{
bool_foo=false;
stringname=mc[i].ToString();
foreach(stringlistinmy_list)
{
if(name==list)
{
_foo=true;
break;
}
}//过滤 if(!_foo)
{
TextBox2.Text+=name+"/n";
}
}
}
}
前台
<%@PageLanguage="C#"AutoEventWireup="true" CodeFile="Default.aspx.cs"Inherits="_Default"%><!DOCTYPEhtmlPUBLIC"-//W3C//DTDXHTML1.0Transitional//EN""http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<htmlxmlns="http://www.w3.org/1999/xhtml">
<headrunat="server">
<title>抓取网页所有链接</title>
</head>
<body>
<formid="form1"runat="server">
<div>
<asp:TextBoxID="TextBox1"runat="server"Width="481px"></asp:TextBox>
<asp:ButtonID="Button1"runat="server"OnClick="Button1_Click"Text="提取"/>
<br/>
<asp:TextBoxID="TextBox2"runat="server"Height="304px"TextMode="MultiLine"Width="524px"></asp:TextBox></div>
</form>
</body>
</html>
相关文章推荐
- .NET2.0抓取网页全部链接【月儿原创】
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接 (转)
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接
- .NET2.0抓取网页全部链接【月儿原创】
- .NET2.0抓取网页全部链接(转)
- .NET2.0抓取网页全部链接
- NET2.0抓取网页全部链接
- NET2.0抓取网页全部链接