您的位置:首页 > 其它

远程桌面强制踢用户下线

2015-06-10 14:43 357 查看
网络上有很多文章介绍如何自动抓取网站的Alexa排名,但是仔细一看发现抓取到的数据(Alexa排名

数值)被Alexa加入了很多干扰元素,如果只是要将数据(Alexa排名数值)显示在页面倒没有什么问题

,若是要对数据进行处理比如将两个网站的排名数值进行比较或者将网站排名数值存入数据库供日后调

用,则要对抓取到的数据进行适当的处理。

以下是本人结合其他网友提供的代码,对抓取到的数据进行处理后获得干干净净的数值的方法。核心

函数代码如下:
<%

'// alexa 世界排名的查询页面为:http://www.alexa.com/data/details/traffic_details?q=&Url=

'// 以下函数抓取到含有干扰元素的数据并通过函数对数据进行处理,获得干干净净的Alexa排名数值

Function alexa(str)

url="http://www.alexa.com/data/details/traffic_details?q=&url="&str

strs=str

If IsObjInstalled("AspHTTP.Conn")=true Then

str= getaspHTTPPage(url)

else

str= getHTTPPage(url)

End if

if str="" then

Call Error()

else

str_=str

str1=""

set reg=new Regexp

reg.Multiline=True

reg.Global=True

reg.IgnoreCase=true

str_top="<!--Did you know"

str_bottom="</span>"

reg.Pattern=""&str_top&"((.|\n)*?)"&str_bottom&""

Set matches = reg.execute(str_)

str1=""

For Each match1 in matches

str1=str1&match1.Value&"***"

Next

Set matches = Nothing

Set reg = Nothing

IF str1 <> "" Then

str1 = Replace(str1,"<!--Did you know? Alexa offers this data programmatically.

Visit http://webservices.amazon.com/ for more information about the Alexa Web Information

Service.-->","")

str1 = Replace(str1,"</span>","")

Str_11=split(str1,"<div class=""borderBottom""></div>")

str1 = Str_11(0)

Str_11 = split(str1,"***")

str1_Pan = Str_11(0)

End If

set reg=new Regexp

reg.Multiline=True

reg.Global=True

reg.IgnoreCase=true

str_top="<td class=""traffic"">"

str_bottom="</td>"

reg.Pattern=""&str_top&"((.|\n)*?)"&str_bottom&""

Set matches = reg.execute(str_)

str1=""

For Each match1 in matches

str1=str1&match1.Value&"***"

Next

Set matches = Nothing

Set reg = Nothing

IF str1 <> "" Then

Str_11=split(str1,"***")

End If

End if

'************************************

'************************************

alexa=getcorrectvalue(str1_Pan)

'************************************

'************************************

End Function

'************************************

'此功能函数去除干扰元素

'************************************

function getcorrectvalue(source)

source="|"+source+"|"

while InStr(source,"<")>0

thestart = InStr(source, "<")

theend = InStr(source, ">")

source = mid(source,1,thestart-1)+right(source,(len(source)-theend))

wend

source=replace(source,"|","")

source=replace(source,",","")

getcorrectvalue=source

end function

'************************************

'************************************

'// <summary>

'// 采用 Microsoft.XMLHTTP 组件采集数据

'// </summary>

Function getHTTPPage(url)

on error resume next

dim http

set http=Server.createobject("Microsoft.XMLHTTP")

Http.open "GET",url,false

Http.send()

if Http.readystate<>4 then

exit function

end if

getHTTPPage=bytes2BSTR(Http.responseBody)

set http=nothing

if err.number<>0 then err.Clear

End function

'// <summary>

'// 采用 ADODB.Stream 处理采集到的数据,把二进制的文件转成文本字符

'// </summary>

Function Bytes2bStr(vin)

Dim BytesStream,StringReturn

Set BytesStream = Server.CreateObject("ADODB.Stream")

BytesStream.Type = 2

BytesStream.Open

BytesStream.WriteText vin

BytesStream.Position = 0

BytesStream.Charset = "GB2312"

BytesStream.Position = 2

StringReturn =BytesStream.ReadText

BytesStream.close

Set BytesStream = Nothing

Bytes2bStr = StringReturn

End Function

'// <summary>

'// 采用 AspHTTP.Conn 组件采集数据

'// </summary>

Function getaspHTTPPage(url)

if url="" then

exit function

end if

Set HttpObj = Server.CreateObject("AspHTTP.Conn")

'设置代理服务器,通过代理上网的用户需要设置此选项

If ProxyIP=1 Then

HttpObj.Proxy="192.168.5.254:808"

end if

HTTPObj.TimeOut = 45

HttpObj.Url = url

HttpObj.RequestMethod = "GET"

getaspHTTPPage = HttpObj.GetURL

set HttpObj=nothing

End function

'//<summary>

'//检查组件,采用xmlhttp抓取网页还是AspHTTP

'//</summary>

Function IsObjInstalled(strClassString)

On Error Resume Next

IsObjInstalled = False

Err = 0

Dim xTestObj

Set xTestObj = Server.CreateObject(strClassString)

If 0 = Err Then

If AspHttpOpen=1 Then

IsObjInstalled = True

'Response.write "当前组件 ASPHTTP"

Else

IsObjInstalled = False

'Response.write "当前组件 XMLHTTP"

End If

Else

IsObjInstalled = False

'Response.write "当前组件 XMLHTTP"

End If

Set xTestObj = Nothing

Err = 0

End Function

Sub Error()

response.write "<BR> 抓取不到数据-可能是因为网络原因不能访问站点<BR><a

href=javascript:location.reload();>重试</a>"

response.end

End Sub

%>

调用方法:

<%

response.write alexa("http://blog.sina.com.cn/u/1086421675")

%>

<%=alexa("http://blog.sina.com.cn/u/1086421675")%>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: