python爬虫设计刷博客访问量(刷访问量,赞,爬取图片)
2016-06-10 18:20
519 查看
需要准备的工具:
安装python软件,下载地址:https://www.python.org/
Fiddler抓包软件:http://blog.csdn.net/qq_21792169/article/details/51628123
刷博客访问量的原理是:打开一次网页博客访问量就增加一次。(新浪,搜狐等博客满足这个要求)
count.py
<span style="font-size:18px;">import webbrowser as web
import time
import os
import random
count = random.randint(1,2)
j=0
while j<count:
i=0
while i<=8 :
web.open_new_tab('http://blog.sina.com.cn/s/blog_552d7c620100aguu.html') #网址替换这里
i=i+1
time.sleep(3) #这个时间根据自己电脑处理速度设置,单位是s
else:
time.sleep(10) <span style="font-family: Arial, Helvetica, sans-serif;">#这个时间根据自己电脑处理速度设置,单位是s</span>
os.system('taskkill /F /IM chrome.exe') #google浏览器,其他的更换下就行
#print 'time webbrower closed'
j=j+1
</span>
刷赞就需要用Fiddler来获取Request header数据,比如Cookie,Host,Referer,User-Agent等
sina.py
<span style="font-size:18px;">import urllib.request
import sys
points = 2 #how count ?
if len(sys.argv) > 1:
points = int(sys.argv[1])
aritcleUrl = ''
point_header = {
'Accept' : '*/*',
'Cookie' : '',#填你的cookie信息
'Host':'', #主机
'Referer' : '',
'User-Agent' : 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36',
}
for i in range(points):
point_request = urllib.request.Request(aritcleUrl, headers = point_header)
point_response = urllib.request.urlopen(point_request)
</span>
上面的header头通过抓包数据可以获取,这里只是提供思路。
爬取网页上的图片:
getimg.py
#coding=utf-8
import urllib
import urllib2
import re
def getHtml(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
req = urllib2.Request(url,headers=headers)
page = urllib2.urlopen(req);
html = page.read()
return html
def getImg(html):
reg = r'src="(h.*?g)"'
#reg = r'<img src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
print imglist
x = 0
for imgurl in imglist:
urllib.urlretrieve(imgurl,'%s.jpg' % x)
x+=1
html = getHtml("http://pic.yxdown.com/list/0_0_1.html")
print getImg(html)
1、 .*? 三个符号可以匹配任意多个任意符号2、 \. 是将 ‘.’ 转义,代表的就是HTML中的 .3、 ()表示 我们只取括号中的部分,省略之外的。
爬取CSDN的访问量csdn.py
这个正则表达式写的不是很完整,如果有置顶文章的话,抓取到的文章标题就会多出<font color="red">[置顶]</font>,所以这里应该添加一个判断语句,读者可以自行尝试。
手动生成IP列表creat_ip:
#-*- coding:utf-8 -*-
#!/usr/bin/python
import time
time_start = time.time()
def get_ip(number='10' ,start='1.1.1.1' ):
file = open('ip_list.txt', 'w')
starts = start.split( '.')
A = int(starts[0])
B = int(starts[1])
C = int(starts[2])
D = int(starts[3])
for A in range(A,256):
for B in range(B, 256):
for C in range(C, 256):
for D in range(D, 256):
ip = "%d.%d.%d.%d" %(A,B,C,D)
if number > 1:
file.write(ip+ '\n')
number -= 1
elif number == 1: #解决最后多一行回车问题
file.write(ip)
number -= 1
else:
file.close()
print ip
return
D = 0
C = 0
B = 0
get_ip(100000,'101.23.228.102')
time_end = time.time()
time = time_end - time_start
print '耗时%s秒' %time
grab_ip.py 抓取代理IP网站,读取出IP和端口号,具体怎么使用这些IP和端口看个人实际情况。
#!/usr/bin/python
#-*- coding:utf-8 -*-
import urllib,time,re,logging
import urllib
import urllib2
import re
import time
import os
import random
url = 'http://www.xicidaili.com/'
csdn_url='http://blog.csdn.net/qq_21792169/article/details/51628142'
header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
USER_AGENT_LIST = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
]
def getProxyHtml(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
req = urllib2.Request(url,headers=headers)
page = urllib2.urlopen(req);
html = page.read()
return html
def ipPortGain(html):
ip_re = re.compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).+\n.+>(\d{1,5})<')
ip_port = re.findall(ip_re,html)
return ip_port
def proxyIP(ip_port):
#to ip deal with['221.238.28.158:8081', '183.62.62.188:9999']格式
proxyIP = []
for i in range( 0,len(ip_port)):
proxyIP.append( ':'.join(ip_port[i]))
logging.info(proxyIP[i])
#to ip deal with[{'http': 'http://221.238.28.158:8081'}, {'http': 'http://183.62.62.188:9999'}]格式
proxy_list = []
for i in range( 0,len(proxyIP)):
a0 = 'http://%s'%proxyIP[i]
a1 = { 'http ':'%s'%a0}
proxy_list.append(a1)
return proxy_list
def csdn_Brush(ip):
print ip
#use ping verify ip if alive
def ping_ip(ip):
ping_cmd = 'ping -c 2 -w 5 %s' % ip
ping_result = os.popen(ping_cmd).read()
print 'ping_cmd : %s, ping_result : %r' % (ping_cmd, ping_result)
if ping_result.find('100% packet loss') < 0:
print 'ping %s ok' % ip
return True
else:
print 'ping %s fail' % ip
fh = open('proxy_ip.txt','w')
html=getProxyHtml(url)
ip_port=ipPortGain(html)
proxy_list=proxyIP(ip_port)
for proxy_ip in proxy_list:
ping_ip(proxy_ip)
fh.write('%s\n'%(proxy_ip,))
res=urllib.urlopen(csdn_url,proxies=proxy_ip).read()#这里可以添加一个for循环,把博文所以的文章都用这个IP请求一次,然后博文的访问量=IP*博文总数*进程数
这样一个完整的刷访问量脚本就写成功了,这样一个脚本运行一次只是一个进程,一个进程出现我问题,整个程序也就无法执行下去,这里写一个C语言脚本程序。
#include<stdlib.h>
int main(int argc,char **argv)
{
while(1)
{
char *cmd="python /home/book/csdn.py"; /* 这里是CSDN刷访问量的Python脚本程序路径 */
system(cmd); /* 这里是执行一个进程,一个进程出现问题,立马开启新的进程,一个进程运行脚本的时间大约是半个小时,所以CSDN的时间检测也就无效了,一天访问量=IP*博文总数*24*2*/
return 0;
}
}
csdn.py
import urllib2
import thread
import time
points = 200000
webstring='http://blog.csdn.net/qq_21792169/article/details/51461098'
aritcleUrl = webstring
point_header = {
'Accept' : '*/*',
'Cookie' : 'Cookie: uuid_tt_dd=225004857698634670_20160708; __message_district_code=000000; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1467989598; lzstat_uv=34579586981120259917|3400560@2942182; _ga=GA1.2.985440903.1467988379; _gat=1; UserName=qq_21792169; UserInfo=4tvvd2LURRttsNBUVWn7g2HWjoUBNOvTYr%2FKJInp6knc%2FWnL7JpBPoWkIFnTu2DLyKyad7FO%2BB3GziEIYWMLk1ekYH0Y04BoGaP4w%2BMUxAd%2B8dmThjsZSsUkBwpSU71HgyVO5RU2A8k1suY%2BaE531Q%3D%3D; UserNick=%E7%BD%91%E7%BB%9C%E4%BA%BAVS%E7%81%B0%E9%B8%BD%E5%AD%90; AU=44A; UD=%E6%9C%9D%E4%BD%9C%E4%B8%80%E5%90%8D%E4%BC%98%E7%A7%80%E7%9A%84%E5%B5%8C%E5%85%A5%E5%BC%8F%E5%BC%80%E5%8F%91%E5%B7%A5%E7%A8%8B%E5%B8%88%E8%80%8C%E5%A5%8B%E6%96%97%EF%BC%8CCSDN%E5%8D%9A%E5%AE%A2%E5%B0%86%E8%AE%B0%E5%BD%95%E6%88%91%E6%88%90%E9%95%BF%E7%9A%84%E7%82%B9%E7%82%B9%E6%BB%B4%E6%BB%B4%E3%80%82; UN=qq_21792169; UE="1549043310@qq.com"; BT=1468046002179; access-token=99302955-285c-4600-8d15-9533eff8f3a9; dc_tos=oa1bjr; dc_session_id=1468046007438; __message_sys_msg_id=0; __message_gu_msg_id=0; __message_cnel_msg_id=0; __message_in_school=0',
'Host':'dc.csdn.net',
'Referer' : webstring,
# 'Referer' : 'http://blog.csdn.net/qq_21792169/article/details/51858371',
'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
}
def test():
for i in range(points):
req = urllib2.Request(aritcleUrl,headers=point_header)
page = urllib2.urlopen(req);
print i
try:
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
except:
print "Error: unable to start thread"
while 1:
pass
#html = page.read()
#print html
csdn_new.py
import urllib2
import thread
import re
points = 1
href="href.html"
cnt=0
point_header = {
'Accept' : '*/*',
'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
}
def test1():
input_file = open(href,"r");
html = input_file.read();
reg = r'href="(http://blog.csdn.net/qq_21792169/article/details/.+?)">'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
x = 0
for imgurl in imglist:
x=x+1
if(x>cnt):
print "blog num %03d :%s"%(x,imgurl)
for i in range(points):
req = urllib2.Request(imgurl,headers=point_header)
urllib2.urlopen(req);
try:
thread.start_new_thread( test1,())
except:
print "Error: unable to start thread"
while 1:
pass
href.html 下面这种格式
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50629515">手把手教你怎么创建自己的网站</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50596464">虚拟机 开发板 PC机 三者之间不能ping通的各种原因分析</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50503279">博客专栏HTML语言编写详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50465363">Linux驱动静态编译和动态编译方法详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50448639">多文件夹下编写Makefile详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50436089">结构体中定义函数指针</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50426701">交叉编译参数 -I -L -l 详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50420937">智能家居网络系统的设计(一)</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50418560">智能家居网络系统设计(二)</a></li><p></p>
最后一个比较可靠的办法:抓取肉鸡,执行我们的脚本程序,安全,可靠。
![](http://static.blog.csdn.net/xheditor/xheditor_emot/default/smile.gif)
![](http://static.blog.csdn.net/xheditor/xheditor_emot/default/smile.gif)
自动发送QQ消息:qq.vbs(复制你要发送的字,打开QQ对话框,点击这个文件)
Set WshShell= WScript.Createobject("WScript.Shell")
for i=1 to 100
WScript.Sleep 1000
WshShell.SendKeys"^v"
WshShell.SendKeys "%s"
next
推荐文章:http://blog.csdn.net/qq_21792169/article/details/5162702
安装python软件,下载地址:https://www.python.org/
Fiddler抓包软件:http://blog.csdn.net/qq_21792169/article/details/51628123
刷博客访问量的原理是:打开一次网页博客访问量就增加一次。(新浪,搜狐等博客满足这个要求)
count.py
<span style="font-size:18px;">import webbrowser as web
import time
import os
import random
count = random.randint(1,2)
j=0
while j<count:
i=0
while i<=8 :
web.open_new_tab('http://blog.sina.com.cn/s/blog_552d7c620100aguu.html') #网址替换这里
i=i+1
time.sleep(3) #这个时间根据自己电脑处理速度设置,单位是s
else:
time.sleep(10) <span style="font-family: Arial, Helvetica, sans-serif;">#这个时间根据自己电脑处理速度设置,单位是s</span>
os.system('taskkill /F /IM chrome.exe') #google浏览器,其他的更换下就行
#print 'time webbrower closed'
j=j+1
</span>
刷赞就需要用Fiddler来获取Request header数据,比如Cookie,Host,Referer,User-Agent等
sina.py
<span style="font-size:18px;">import urllib.request
import sys
points = 2 #how count ?
if len(sys.argv) > 1:
points = int(sys.argv[1])
aritcleUrl = ''
point_header = {
'Accept' : '*/*',
'Cookie' : '',#填你的cookie信息
'Host':'', #主机
'Referer' : '',
'User-Agent' : 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36',
}
for i in range(points):
point_request = urllib.request.Request(aritcleUrl, headers = point_header)
point_response = urllib.request.urlopen(point_request)
</span>
上面的header头通过抓包数据可以获取,这里只是提供思路。
爬取网页上的图片:
getimg.py
#coding=utf-8
import urllib
import urllib2
import re
def getHtml(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
req = urllib2.Request(url,headers=headers)
page = urllib2.urlopen(req);
html = page.read()
return html
def getImg(html):
reg = r'src="(h.*?g)"'
#reg = r'<img src="(.+?\.jpg)"'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
print imglist
x = 0
for imgurl in imglist:
urllib.urlretrieve(imgurl,'%s.jpg' % x)
x+=1
html = getHtml("http://pic.yxdown.com/list/0_0_1.html")
print getImg(html)
1、 .*? 三个符号可以匹配任意多个任意符号2、 \. 是将 ‘.’ 转义,代表的就是HTML中的 .3、 ()表示 我们只取括号中的部分,省略之外的。
爬取CSDN的访问量csdn.py
#!usr/bin/python # -*- coding: utf-8 -*- import urllib2 import re #当前的博客列表页号 page_num = 1 #不是最后列表的一页 notLast = 1 fs = open('blogs.txt','w') account = str(raw_input('Input csdn Account:')) while notLast: #首页地址 baseUrl = 'http://blog.csdn.net/'+account #连接页号,组成爬取的页面网址 myUrl = baseUrl+'/article/list/'+str(page_num) #伪装成浏览器访问,直接访问的话csdn会拒绝 user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' headers = {'User-Agent':user_agent} #构造请求 req = urllib2.Request(myUrl,headers=headers) #访问页面 myResponse = urllib2.urlopen(req) myPage = myResponse.read() #在页面中查找是否存在‘尾页’这一个标签来判断是否为最后一页 notLast = re.findall('<a href=".*?">尾页</a>',myPage,re.S) print '-----------------------------第%d页---------------------------------' % (page_num,) fs.write('--------------------------------第%d页--------------------------------\n' % page_num) #利用正则表达式来获取博客的href title_href = re.findall('<span class="link_title"><a href="(.*?)">',myPage,re.S) titleListhref=[] for items in title_href: titleListhref.append(str(items).lstrip().rstrip()) #利用正则表达式来获取博客的 title= re.findall('<span class="link_title"><a href=".*?">(.*?)</a></span>',myPage,re.S) titleList=[] for items in title: titleList.append(str(items).lstrip().rstrip()) #利用正则表达式获取博客的访问量 view = re.findall('<span class="link_view".*?><a href=".*?" title="阅读次数">阅读</a>\((.*?)\)</span>',myPage,re.S) viewList=[] for items in view: viewList.append(str(items).lstrip().rstrip()) #将结果输出 for n in range(len(titleList)): print '访问量:%s href:%s 标题:%s' % (viewList .zfill(4),titleListhref ,titleList ) fs.write('访问量:%s\t\thref:%s\t\t标题:%s\n' % (viewList .zfill(4),titleListhref ,titleList )) #页号加1 page_num = page_num + 1
这个正则表达式写的不是很完整,如果有置顶文章的话,抓取到的文章标题就会多出<font color="red">[置顶]</font>,所以这里应该添加一个判断语句,读者可以自行尝试。
手动生成IP列表creat_ip:
#-*- coding:utf-8 -*-
#!/usr/bin/python
import time
time_start = time.time()
def get_ip(number='10' ,start='1.1.1.1' ):
file = open('ip_list.txt', 'w')
starts = start.split( '.')
A = int(starts[0])
B = int(starts[1])
C = int(starts[2])
D = int(starts[3])
for A in range(A,256):
for B in range(B, 256):
for C in range(C, 256):
for D in range(D, 256):
ip = "%d.%d.%d.%d" %(A,B,C,D)
if number > 1:
file.write(ip+ '\n')
number -= 1
elif number == 1: #解决最后多一行回车问题
file.write(ip)
number -= 1
else:
file.close()
print ip
return
D = 0
C = 0
B = 0
get_ip(100000,'101.23.228.102')
time_end = time.time()
time = time_end - time_start
print '耗时%s秒' %time
grab_ip.py 抓取代理IP网站,读取出IP和端口号,具体怎么使用这些IP和端口看个人实际情况。
#!/usr/bin/python
#-*- coding:utf-8 -*-
import urllib,time,re,logging
import urllib
import urllib2
import re
import time
import os
import random
url = 'http://www.xicidaili.com/'
csdn_url='http://blog.csdn.net/qq_21792169/article/details/51628142'
header = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
USER_AGENT_LIST = [
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11',
'Opera/9.25 (Windows NT 5.1; U; en)',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12',
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9'
]
def getProxyHtml(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
req = urllib2.Request(url,headers=headers)
page = urllib2.urlopen(req);
html = page.read()
return html
def ipPortGain(html):
ip_re = re.compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).+\n.+>(\d{1,5})<')
ip_port = re.findall(ip_re,html)
return ip_port
def proxyIP(ip_port):
#to ip deal with['221.238.28.158:8081', '183.62.62.188:9999']格式
proxyIP = []
for i in range( 0,len(ip_port)):
proxyIP.append( ':'.join(ip_port[i]))
logging.info(proxyIP[i])
#to ip deal with[{'http': 'http://221.238.28.158:8081'}, {'http': 'http://183.62.62.188:9999'}]格式
proxy_list = []
for i in range( 0,len(proxyIP)):
a0 = 'http://%s'%proxyIP[i]
a1 = { 'http ':'%s'%a0}
proxy_list.append(a1)
return proxy_list
def csdn_Brush(ip):
print ip
#use ping verify ip if alive
def ping_ip(ip):
ping_cmd = 'ping -c 2 -w 5 %s' % ip
ping_result = os.popen(ping_cmd).read()
print 'ping_cmd : %s, ping_result : %r' % (ping_cmd, ping_result)
if ping_result.find('100% packet loss') < 0:
print 'ping %s ok' % ip
return True
else:
print 'ping %s fail' % ip
fh = open('proxy_ip.txt','w')
html=getProxyHtml(url)
ip_port=ipPortGain(html)
proxy_list=proxyIP(ip_port)
for proxy_ip in proxy_list:
ping_ip(proxy_ip)
fh.write('%s\n'%(proxy_ip,))
res=urllib.urlopen(csdn_url,proxies=proxy_ip).read()#这里可以添加一个for循环,把博文所以的文章都用这个IP请求一次,然后博文的访问量=IP*博文总数*进程数
(有时间间隔,大约是半个小时,CSDN设置时间检测,所以我们配合上C语言) fh.close()
这样一个完整的刷访问量脚本就写成功了,这样一个脚本运行一次只是一个进程,一个进程出现我问题,整个程序也就无法执行下去,这里写一个C语言脚本程序。
#include<stdlib.h>
int main(int argc,char **argv)
{
while(1)
{
char *cmd="python /home/book/csdn.py"; /* 这里是CSDN刷访问量的Python脚本程序路径 */
system(cmd); /* 这里是执行一个进程,一个进程出现问题,立马开启新的进程,一个进程运行脚本的时间大约是半个小时,所以CSDN的时间检测也就无效了,一天访问量=IP*博文总数*24*2*/
return 0;
}
}
csdn.py
import urllib2
import thread
import time
points = 200000
webstring='http://blog.csdn.net/qq_21792169/article/details/51461098'
aritcleUrl = webstring
point_header = {
'Accept' : '*/*',
'Cookie' : 'Cookie: uuid_tt_dd=225004857698634670_20160708; __message_district_code=000000; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1467989598; lzstat_uv=34579586981120259917|3400560@2942182; _ga=GA1.2.985440903.1467988379; _gat=1; UserName=qq_21792169; UserInfo=4tvvd2LURRttsNBUVWn7g2HWjoUBNOvTYr%2FKJInp6knc%2FWnL7JpBPoWkIFnTu2DLyKyad7FO%2BB3GziEIYWMLk1ekYH0Y04BoGaP4w%2BMUxAd%2B8dmThjsZSsUkBwpSU71HgyVO5RU2A8k1suY%2BaE531Q%3D%3D; UserNick=%E7%BD%91%E7%BB%9C%E4%BA%BAVS%E7%81%B0%E9%B8%BD%E5%AD%90; AU=44A; UD=%E6%9C%9D%E4%BD%9C%E4%B8%80%E5%90%8D%E4%BC%98%E7%A7%80%E7%9A%84%E5%B5%8C%E5%85%A5%E5%BC%8F%E5%BC%80%E5%8F%91%E5%B7%A5%E7%A8%8B%E5%B8%88%E8%80%8C%E5%A5%8B%E6%96%97%EF%BC%8CCSDN%E5%8D%9A%E5%AE%A2%E5%B0%86%E8%AE%B0%E5%BD%95%E6%88%91%E6%88%90%E9%95%BF%E7%9A%84%E7%82%B9%E7%82%B9%E6%BB%B4%E6%BB%B4%E3%80%82; UN=qq_21792169; UE="1549043310@qq.com"; BT=1468046002179; access-token=99302955-285c-4600-8d15-9533eff8f3a9; dc_tos=oa1bjr; dc_session_id=1468046007438; __message_sys_msg_id=0; __message_gu_msg_id=0; __message_cnel_msg_id=0; __message_in_school=0',
'Host':'dc.csdn.net',
'Referer' : webstring,
# 'Referer' : 'http://blog.csdn.net/qq_21792169/article/details/51858371',
'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
}
def test():
for i in range(points):
req = urllib2.Request(aritcleUrl,headers=point_header)
page = urllib2.urlopen(req);
print i
try:
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
thread.start_new_thread( test,())
except:
print "Error: unable to start thread"
while 1:
pass
#html = page.read()
#print html
csdn_new.py
import urllib2
import thread
import re
points = 1
href="href.html"
cnt=0
point_header = {
'Accept' : '*/*',
'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6',
}
def test1():
input_file = open(href,"r");
html = input_file.read();
reg = r'href="(http://blog.csdn.net/qq_21792169/article/details/.+?)">'
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
x = 0
for imgurl in imglist:
x=x+1
if(x>cnt):
print "blog num %03d :%s"%(x,imgurl)
for i in range(points):
req = urllib2.Request(imgurl,headers=point_header)
urllib2.urlopen(req);
try:
thread.start_new_thread( test1,())
except:
print "Error: unable to start thread"
while 1:
pass
href.html 下面这种格式
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50629515">手把手教你怎么创建自己的网站</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50596464">虚拟机 开发板 PC机 三者之间不能ping通的各种原因分析</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50503279">博客专栏HTML语言编写详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50465363">Linux驱动静态编译和动态编译方法详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50448639">多文件夹下编写Makefile详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50436089">结构体中定义函数指针</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50426701">交叉编译参数 -I -L -l 详解</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50420937">智能家居网络系统的设计(一)</a></li><p></p>
<li><a target="_blank" href="http://blog.csdn.net/qq_21792169/article/details/50418560">智能家居网络系统设计(二)</a></li><p></p>
最后一个比较可靠的办法:抓取肉鸡,执行我们的脚本程序,安全,可靠。
![](http://static.blog.csdn.net/xheditor/xheditor_emot/default/smile.gif)
![](http://static.blog.csdn.net/xheditor/xheditor_emot/default/smile.gif)
自动发送QQ消息:qq.vbs(复制你要发送的字,打开QQ对话框,点击这个文件)
Set WshShell= WScript.Createobject("WScript.Shell")
for i=1 to 100
WScript.Sleep 1000
WshShell.SendKeys"^v"
WshShell.SendKeys "%s"
next
推荐文章:http://blog.csdn.net/qq_21792169/article/details/5162702
相关文章推荐
- Ubuntu の Python-Web框架Django 环境安装 V1.1
- python文件和路径操作函数小结
- python-文件操作(1)
- Ubuntu の 多版本Python的安装管理与切换
- python轻量级爬虫学习笔记 之 urllib的应用
- 【python深度学习】theano环境搭建/安装
- python中判断小数的方法
- Python - 子类继承父类 和 Java有什么区别
- python《初次理解变量》
- 闭包的一个注意事项
- Python2.7.X 中文注释
- spearman学习
- python strip()函数
- Python--核心编程学习
- Python学习笔记-学生成绩考评方法
- python---基础回顾(爬虫)
- Python多进程multiprocessing使用示例
- python+selenium环境搭建
- Python
- Python项目之万能的XML