python3 爬取猫眼榜单top100(requests+beautifulsoup)
2018-01-04 15:36
806 查看
初学python,记录学习过程。
爬取的url:http://maoyan.com/board/4
共十页,第二页的url:http://maoyan.com/board/4?offset=10 以此类推
源码如下:
#猫眼电影TOP100
import requests
from requests.exceptions import RequestException
from bs4 import BeautifulSoup
#找到str在string中最后出现的位置,若没有出现过,则返回-1
def find_last(string,str):
last_position=-1
while True:
position=string.find(str,last_position+1)
if position==-1:
return last_position
last_position=position
def get_html(url):
try:
response = requests.get(url)
if response.status_code ==200:
return response.text
return None
except RequestException:
return None
def parse_html(html):
soup = BeautifulSoup(html , "html.parser")
names = [i.a.string for i in soup.select(".movie-item-info > .name")]
stars = [i.string for i in soup.select(".movie-item-info > .star")]
times = [i.string for i in soup.select(".movie-item-info > .releasetime")]
integers = [i.string for i in soup.select(".score > .integer")]
fractions = [i.string for i in soup.select(".score > .fraction")]
star = []
time = []
score = []
for s in stars:
index = s.find('主演')
lindex = find_last(s,'\n')
star.append(s[index+3:lindex])
for s in times:
index = s.find('上映时间')
time.append(s[index+5:])
for x,y in zip(integers,fractions):
score.append(x+y)
return names,star,time,score
def write_to_file(names,stars,times,scores):
file = open('result.txt','a',encoding='utf-8')
# file.write('%-15s %-20s %-17s %-3s' % ('电影', '主演', '上映时间', '评分:'))
# file.write('\n')
for n,s,t,sc in zip(names,stars,times,scores):
file.write('%-15s %-20s %-17s %-3s' % (n,s,t,sc))
file.write('\n')
file.close()
def main(offset):
url = 'http://maoyan.com/board/4?offset='+str(offset)
html = get_html(url)
names,stars,times,scores = parse_html(html)
write_to_file(names,stars,times,scores)
if __name__ == '__main__':
for i in range(10):
main(i*10)
结果如下:
爬取的url:http://maoyan.com/board/4
共十页,第二页的url:http://maoyan.com/board/4?offset=10 以此类推
源码如下:
#猫眼电影TOP100
import requests
from requests.exceptions import RequestException
from bs4 import BeautifulSoup
#找到str在string中最后出现的位置,若没有出现过,则返回-1
def find_last(string,str):
last_position=-1
while True:
position=string.find(str,last_position+1)
if position==-1:
return last_position
last_position=position
def get_html(url):
try:
response = requests.get(url)
if response.status_code ==200:
return response.text
return None
except RequestException:
return None
def parse_html(html):
soup = BeautifulSoup(html , "html.parser")
names = [i.a.string for i in soup.select(".movie-item-info > .name")]
stars = [i.string for i in soup.select(".movie-item-info > .star")]
times = [i.string for i in soup.select(".movie-item-info > .releasetime")]
integers = [i.string for i in soup.select(".score > .integer")]
fractions = [i.string for i in soup.select(".score > .fraction")]
star = []
time = []
score = []
for s in stars:
index = s.find('主演')
lindex = find_last(s,'\n')
star.append(s[index+3:lindex])
for s in times:
index = s.find('上映时间')
time.append(s[index+5:])
for x,y in zip(integers,fractions):
score.append(x+y)
return names,star,time,score
def write_to_file(names,stars,times,scores):
file = open('result.txt','a',encoding='utf-8')
# file.write('%-15s %-20s %-17s %-3s' % ('电影', '主演', '上映时间', '评分:'))
# file.write('\n')
for n,s,t,sc in zip(names,stars,times,scores):
file.write('%-15s %-20s %-17s %-3s' % (n,s,t,sc))
file.write('\n')
file.close()
def main(offset):
url = 'http://maoyan.com/board/4?offset='+str(offset)
html = get_html(url)
names,stars,times,scores = parse_html(html)
write_to_file(names,stars,times,scores)
if __name__ == '__main__':
for i in range(10):
main(i*10)
结果如下:
相关文章推荐
- python简单爬虫开发(urllib2、requests + BeautifulSoup)
- Python requests+gevent+BeautifulSoup lxml 干点啥-加点速
- python获取网页page数,同时按照href批量爬取网页(requests+BeautifulSoup)
- python eclipse 插件安装 及BeautifulSoup requests selenium在线安装 PhantomJS 安装 环境配置
- Python学习记录-爬取猫眼电影top100榜单
- python3 requests+BeautifulSoup使用多进程爬取妹子图
- Python + Requests + BeautifulSoup每日BUG汇总
- python+beautifulsoup爬取豆瓣电影TOP250
- Python 爬取 猫眼 top100 电影例子
- Python 爬虫—— requests BeautifulSoup
- 详解Python 采用 requests + Beautiful Soup 爬取房天下新楼盘推荐 推荐
- Python爬虫知识(1)——scrapy vs requests+BeautifulSoup
- python 爬虫项目-爬取猫眼top100电影
- 利用requests和正则爬取猫眼电影top100榜单
- [原创] Python3.6+request+beautiful 半次元Top100 爬虫实战,将小姐姐的cos美图获得
- 【Python爬虫】requests+Beautifulsoup存入数据库
- Python爬取猫眼top100排行榜数据【含多线程】
- Python爬虫入门之一-requests+BeautifulSoup
- Python爬虫-爬取猫眼电影Top100榜单
- 使用requests+beautifulsoup模块实现python网络爬虫功能