您的位置:首页 > 编程语言 > Python开发

python刷取CSDN博文访问量之三

2015-06-28 21:41 411 查看
python刷取CSDN博文访问量之三

作者:vpoet

注:这个系列我只贴代码,代码不注释.有兴趣的自己读读就懂了,纯属娱乐,望管理员抬手
若有转载一定不要注明来源


#coding=utf-8
import webbrowser
import time
import urllib2
import re
import os
import thread
import threading
mylock = threading.RLock()

tabcount=1

def BlogFun(n,url,MaxVisitor):
visitcount = r'<span class="link_view" title="阅读次数">(\d+)人阅读</span>'
global tabcount
while True:
mylock.acquire()
if tabcount >10:
os.system('taskkill /F /IM chrome.exe')
tabcount = 1
else:
tabcount = tabcount + 1
mylock.release()
webbrowser.open(url,new=1)
request=urllib2.Request(url)
request.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
opener = urllib2.build_opener()
fblog = opener.open(request)
htm = fblog.read()
Ref=re.findall(visitcount,htm);
#print url+": "+str(int(Ref[0]))+"人阅读"
if int(Ref[0])>MaxVisitor:
break
time.sleep(n)

if __name__=="__main__":

Domain="http://blog.csdn.net"
main_url = "http://blog.csdn.net/u013018721"
patt_article = r'<span class="link_title"><a href="(.+)">'

Mainrequest=urllib2.Request(main_url)
Mainrequest.add_header('User-Agent','Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6')
opener = urllib2.build_opener()
fMainblog = opener.open(Mainrequest)
Mainhtml= fMainblog.read()
article_urls = re.findall(patt_article,Mainhtml)
threadnumber = 1
MaxVisitor = 300
timedelay=3
for item in article_urls:
Realurl =  Domain+item
thread.start_new_thread(BlogFun,(timedelay,Realurl,MaxVisitor,))
threadnumber=threadnumber+1
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: