您的位置:首页 > 编程语言 > Python开发

python爬取bilibili番剧图片

2016-06-24 15:31 537 查看
# -*- coding:utf-8 -*-
import urllib
import urllib2
import re
import time
import os
import sys
from bs4 import BeautifulSoup

hello="请输入首位置与末位置(1~999)"
print hello.decode("utf-8")
page_start=input("page_start:")
page_end=input("page_end:")
start=int(page_start)
end=int(page_end)

path=os.getcwd()
new_path=os.path.join(path,'bilibili')
if not os.path.isdir(new_path):
os.mkdir(new_path)
file_url=new_path+"\AnimeList" + str(page_start) + "_" + str(page_end) + ".txt"
data = open(file_url,'w')

def GetPageInfo(page):

url = 'http://bangumi.bilibili.com/anime/3' + str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request).read().decode("utf-8")
soup = BeautifulSoup(response,"html.parser")

pic_name=new_path + "\\3" + str(page) + ".jpg"
div = soup.find("div",{'class':"bangumi-preview"})
pic = div.find("img").get('src')
getpic = urllib2.urlopen(pic).read()
with open(pic_name,'wb') as code:
code.write(getpic)

title = soup.find("h1",{'class':"info-title"}).get_text()
data.write(title.encode('utf-8'))
info=" ID:3"+str(page)+"  \n"
data.write(info)

count=start
while (count<=end):
try:
GetPageInfo(count)
print str(count)+"get"
count=count+1
except:
print str(count)+"x"
count=count+1
continue
#GetPageInfo(346)
data.close()
ts="已完成"
print ts.decode("utf-8")
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python utf-8 图片