python例子-PyQuery抓取信息.
2015-10-20 12:07
555 查看
#!/usr/bin/python #coding:utf-8 from pyquery import PyQuery import re # 抓取:http://www.stylebop.com/cn/product_details.php?id=606526&special=sale # 获得 产品名 品牌 价格 size 图片(大图) def main(): pqhtml = PyQuery(url = 'http://www.stylebop.com/cn/product_details.php?id=606526&special=sale') #产品图片: img_li = pqhtml('li').filter('.image_click_rotator') pattern_img = re.compile(".*?'(.*?jpg)'.*?'.*?'.*?'.*?'.*?'(.*?jpg)'.*?") img_list = [] for li in img_li: #div = li.getchildren()[0] #a = div.getchildren()[0] href = li.getchildren()[0].getchildren()[0].get('href') items = re.findall(pattern_img,href) img_large = list(items[0])[1] if img_large[0:4] != 'http' : img_large = 'http://www.stylebop.com%s' %img_large img_list.append(img_large) print '产品图片:' , img_list #产品品牌: brand = pqhtml('div').filter('.productInfo')('a:first').text() print '品牌:%s' %brand #价格 price_div = pqhtml('div').filter('#product_price') #根据ID获取价格的div price_first_span = price_div('span:first') #获取第一个span old_price = '' new_price = '' if price_first_span.hasClass('old_price'): old_price = price_first_span.text new_price = price_div('span:eq(1)').text() + ' / ' + price_div('span:eq(3)').text() else: new_price = price_div.text() + ' / ' + price_div('span:first').text print '价格:' , new_price #print '价格:%s' % new_price #这样打印会报编码错误:'ascii' codec can't encode character u'\u20ac' in position 21: ordinal not in range(128) #size size_option = pqhtml('select').filter('.newInput2')('option') size_list = [] for size in size_option: #为HTMLElement对象 size_list.append(size.text) print 'size:', size_list #产品名: pname = pqhtml('div').filter('.productInfo')('span:first').text() print '产品名:%s' % pname if __name__ == '__main__': main()
相关文章推荐
- Python filter map练习
- Python点滴01——关于Python2.x和3.x的一些个人感受
- python Timestamp、Datetime、UTC时间之间转换练习
- Python中实现对Timestamp和Datetime及UTC时间之间的转换
- 简单功能的正则表达式引擎实现
- ubuntu14.01 下python3.4 链接mysql数据库
- python 面向对象(进阶篇)
- Python+opencv人脸识别
- Python时间,日期,时间戳之间转换
- python下sched实现延时或者循环的用法
- selenium webdriver (python)第三版.pdf
- python异常处理
- python操作二进制文件
- 关于Python的3张图
- python之len函数
- Windows下安装python 包管理器pip
- python实现汉诺塔
- 笨办法学python的笔记之七(ex37 复习各种符号)
- 笨办法学python的笔记之六(ex36 设计和调试)
- python(1) python基础