远程桌面强制踢用户下线
2015-06-10 14:43
357 查看
网络上有很多文章介绍如何自动抓取网站的Alexa排名,但是仔细一看发现抓取到的数据(Alexa排名
数值)被Alexa加入了很多干扰元素,如果只是要将数据(Alexa排名数值)显示在页面倒没有什么问题
,若是要对数据进行处理比如将两个网站的排名数值进行比较或者将网站排名数值存入数据库供日后调
用,则要对抓取到的数据进行适当的处理。
以下是本人结合其他网友提供的代码,对抓取到的数据进行处理后获得干干净净的数值的方法。核心
函数代码如下:
<%
'// alexa 世界排名的查询页面为:http://www.alexa.com/data/details/traffic_details?q=&Url=
'// 以下函数抓取到含有干扰元素的数据并通过函数对数据进行处理,获得干干净净的Alexa排名数值
Function alexa(str)
url="http://www.alexa.com/data/details/traffic_details?q=&url="&str
strs=str
If IsObjInstalled("AspHTTP.Conn")=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
if str="" then
Call Error()
else
str_=str
str1=""
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top="<!--Did you know"
str_bottom="</span>"
reg.Pattern=""&str_top&"((.|\n)*?)"&str_bottom&""
Set matches = reg.execute(str_)
str1=""
For Each match1 in matches
str1=str1&match1.Value&"***"
Next
Set matches = Nothing
Set reg = Nothing
IF str1 <> "" Then
str1 = Replace(str1,"<!--Did you know? Alexa offers this data programmatically.
Visit http://webservices.amazon.com/ for more information about the Alexa Web Information
Service.-->","")
str1 = Replace(str1,"</span>","")
Str_11=split(str1,"<div class=""borderBottom""></div>")
str1 = Str_11(0)
Str_11 = split(str1,"***")
str1_Pan = Str_11(0)
End If
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top="<td class=""traffic"">"
str_bottom="</td>"
reg.Pattern=""&str_top&"((.|\n)*?)"&str_bottom&""
Set matches = reg.execute(str_)
str1=""
For Each match1 in matches
str1=str1&match1.Value&"***"
Next
Set matches = Nothing
Set reg = Nothing
IF str1 <> "" Then
Str_11=split(str1,"***")
End If
End if
'************************************
'************************************
alexa=getcorrectvalue(str1_Pan)
'************************************
'************************************
End Function
'************************************
'此功能函数去除干扰元素
'************************************
function getcorrectvalue(source)
source="|"+source+"|"
while InStr(source,"<")>0
thestart = InStr(source, "<")
theend = InStr(source, ">")
source = mid(source,1,thestart-1)+right(source,(len(source)-theend))
wend
source=replace(source,"|","")
source=replace(source,",","")
getcorrectvalue=source
end function
'************************************
'************************************
'// <summary>
'// 采用 Microsoft.XMLHTTP 组件采集数据
'// </summary>
Function getHTTPPage(url)
on error resume next
dim http
set http=Server.createobject("Microsoft.XMLHTTP")
Http.open "GET",url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=bytes2BSTR(Http.responseBody)
set http=nothing
if err.number<>0 then err.Clear
End function
'// <summary>
'// 采用 ADODB.Stream 处理采集到的数据,把二进制的文件转成文本字符
'// </summary>
Function Bytes2bStr(vin)
Dim BytesStream,StringReturn
Set BytesStream = Server.CreateObject("ADODB.Stream")
BytesStream.Type = 2
BytesStream.Open
BytesStream.WriteText vin
BytesStream.Position = 0
BytesStream.Charset = "GB2312"
BytesStream.Position = 2
StringReturn =BytesStream.ReadText
BytesStream.close
Set BytesStream = Nothing
Bytes2bStr = StringReturn
End Function
'// <summary>
'// 采用 AspHTTP.Conn 组件采集数据
'// </summary>
Function getaspHTTPPage(url)
if url="" then
exit function
end if
Set HttpObj = Server.CreateObject("AspHTTP.Conn")
'设置代理服务器,通过代理上网的用户需要设置此选项
If ProxyIP=1 Then
HttpObj.Proxy="192.168.5.254:808"
end if
HTTPObj.TimeOut = 45
HttpObj.Url = url
HttpObj.RequestMethod = "GET"
getaspHTTPPage = HttpObj.GetURL
set HttpObj=nothing
End function
'//<summary>
'//检查组件,采用xmlhttp抓取网页还是AspHTTP
'//</summary>
Function IsObjInstalled(strClassString)
On Error Resume Next
IsObjInstalled = False
Err = 0
Dim xTestObj
Set xTestObj = Server.CreateObject(strClassString)
If 0 = Err Then
If AspHttpOpen=1 Then
IsObjInstalled = True
'Response.write "当前组件 ASPHTTP"
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Set xTestObj = Nothing
Err = 0
End Function
Sub Error()
response.write "<BR> 抓取不到数据-可能是因为网络原因不能访问站点<BR><a
href=javascript:location.reload();>重试</a>"
response.end
End Sub
%>
调用方法:
<%
response.write alexa("http://blog.sina.com.cn/u/1086421675")
%>
<%=alexa("http://blog.sina.com.cn/u/1086421675")%>
数值)被Alexa加入了很多干扰元素,如果只是要将数据(Alexa排名数值)显示在页面倒没有什么问题
,若是要对数据进行处理比如将两个网站的排名数值进行比较或者将网站排名数值存入数据库供日后调
用,则要对抓取到的数据进行适当的处理。
以下是本人结合其他网友提供的代码,对抓取到的数据进行处理后获得干干净净的数值的方法。核心
函数代码如下:
<%
'// alexa 世界排名的查询页面为:http://www.alexa.com/data/details/traffic_details?q=&Url=
'// 以下函数抓取到含有干扰元素的数据并通过函数对数据进行处理,获得干干净净的Alexa排名数值
Function alexa(str)
url="http://www.alexa.com/data/details/traffic_details?q=&url="&str
strs=str
If IsObjInstalled("AspHTTP.Conn")=true Then
str= getaspHTTPPage(url)
else
str= getHTTPPage(url)
End if
if str="" then
Call Error()
else
str_=str
str1=""
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top="<!--Did you know"
str_bottom="</span>"
reg.Pattern=""&str_top&"((.|\n)*?)"&str_bottom&""
Set matches = reg.execute(str_)
str1=""
For Each match1 in matches
str1=str1&match1.Value&"***"
Next
Set matches = Nothing
Set reg = Nothing
IF str1 <> "" Then
str1 = Replace(str1,"<!--Did you know? Alexa offers this data programmatically.
Visit http://webservices.amazon.com/ for more information about the Alexa Web Information
Service.-->","")
str1 = Replace(str1,"</span>","")
Str_11=split(str1,"<div class=""borderBottom""></div>")
str1 = Str_11(0)
Str_11 = split(str1,"***")
str1_Pan = Str_11(0)
End If
set reg=new Regexp
reg.Multiline=True
reg.Global=True
reg.IgnoreCase=true
str_top="<td class=""traffic"">"
str_bottom="</td>"
reg.Pattern=""&str_top&"((.|\n)*?)"&str_bottom&""
Set matches = reg.execute(str_)
str1=""
For Each match1 in matches
str1=str1&match1.Value&"***"
Next
Set matches = Nothing
Set reg = Nothing
IF str1 <> "" Then
Str_11=split(str1,"***")
End If
End if
'************************************
'************************************
alexa=getcorrectvalue(str1_Pan)
'************************************
'************************************
End Function
'************************************
'此功能函数去除干扰元素
'************************************
function getcorrectvalue(source)
source="|"+source+"|"
while InStr(source,"<")>0
thestart = InStr(source, "<")
theend = InStr(source, ">")
source = mid(source,1,thestart-1)+right(source,(len(source)-theend))
wend
source=replace(source,"|","")
source=replace(source,",","")
getcorrectvalue=source
end function
'************************************
'************************************
'// <summary>
'// 采用 Microsoft.XMLHTTP 组件采集数据
'// </summary>
Function getHTTPPage(url)
on error resume next
dim http
set http=Server.createobject("Microsoft.XMLHTTP")
Http.open "GET",url,false
Http.send()
if Http.readystate<>4 then
exit function
end if
getHTTPPage=bytes2BSTR(Http.responseBody)
set http=nothing
if err.number<>0 then err.Clear
End function
'// <summary>
'// 采用 ADODB.Stream 处理采集到的数据,把二进制的文件转成文本字符
'// </summary>
Function Bytes2bStr(vin)
Dim BytesStream,StringReturn
Set BytesStream = Server.CreateObject("ADODB.Stream")
BytesStream.Type = 2
BytesStream.Open
BytesStream.WriteText vin
BytesStream.Position = 0
BytesStream.Charset = "GB2312"
BytesStream.Position = 2
StringReturn =BytesStream.ReadText
BytesStream.close
Set BytesStream = Nothing
Bytes2bStr = StringReturn
End Function
'// <summary>
'// 采用 AspHTTP.Conn 组件采集数据
'// </summary>
Function getaspHTTPPage(url)
if url="" then
exit function
end if
Set HttpObj = Server.CreateObject("AspHTTP.Conn")
'设置代理服务器,通过代理上网的用户需要设置此选项
If ProxyIP=1 Then
HttpObj.Proxy="192.168.5.254:808"
end if
HTTPObj.TimeOut = 45
HttpObj.Url = url
HttpObj.RequestMethod = "GET"
getaspHTTPPage = HttpObj.GetURL
set HttpObj=nothing
End function
'//<summary>
'//检查组件,采用xmlhttp抓取网页还是AspHTTP
'//</summary>
Function IsObjInstalled(strClassString)
On Error Resume Next
IsObjInstalled = False
Err = 0
Dim xTestObj
Set xTestObj = Server.CreateObject(strClassString)
If 0 = Err Then
If AspHttpOpen=1 Then
IsObjInstalled = True
'Response.write "当前组件 ASPHTTP"
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Else
IsObjInstalled = False
'Response.write "当前组件 XMLHTTP"
End If
Set xTestObj = Nothing
Err = 0
End Function
Sub Error()
response.write "<BR> 抓取不到数据-可能是因为网络原因不能访问站点<BR><a
href=javascript:location.reload();>重试</a>"
response.end
End Sub
%>
调用方法:
<%
response.write alexa("http://blog.sina.com.cn/u/1086421675")
%>
<%=alexa("http://blog.sina.com.cn/u/1086421675")%>
相关文章推荐
- SolrCloud Wiki翻译(1) Getting Started with SolrCloud
- 一行代码搞定所有屏幕适配
- Spring+Mybatis 多数据源配置
- ZWK so lazy
- android 网络运营商的名字显示规则(锁定屏幕,下拉列表)
- MapReduce编程
- python类型比较的方式
- linux下svn不能连接Windows SVN服务器
- startup migrate、startup upgrade区别以及Oracle打补丁集文档(转)
- 卸载Linux上默认的JDK
- 去除中国菜刀密码的方法
- 几种常见的动画
- HTML特殊字符过滤器
- [***]在Unity 脚本中的作用 C#
- JavaScript var关键字、变量的状态、异常处理、命名规范等介绍
- _itoa atoi、atof、itoa、itow _itoa_s 类型转换使用说明
- jquery实现网页自动添加必填项图标和日期自动填充
- 编译原理 龙书 第二章 一个简单的算术式(+,-)翻译器实现
- JavaScript function函数种类
- vs2010解决提示“一个或多个多重定义的符号“这种错误