您的位置:首页 > 编程语言 > Python开发

python例子-PyQuery抓取信息.

2015-10-20 12:07 555 查看
#!/usr/bin/python
#coding:utf-8

from pyquery import PyQuery
import re

# 抓取:http://www.stylebop.com/cn/product_details.php?id=606526&special=sale
# 获得   产品名 品牌 价格 size  图片(大图)
def main():
pqhtml = PyQuery(url = 'http://www.stylebop.com/cn/product_details.php?id=606526&special=sale')
#产品图片:
img_li = pqhtml('li').filter('.image_click_rotator')
pattern_img = re.compile(".*?'(.*?jpg)'.*?'.*?'.*?'.*?'.*?'(.*?jpg)'.*?")
img_list = []
for li in img_li:
#div = li.getchildren()[0]
#a = div.getchildren()[0]
href = li.getchildren()[0].getchildren()[0].get('href')
items = re.findall(pattern_img,href)
img_large = list(items[0])[1]
if img_large[0:4] != 'http' :
img_large = 'http://www.stylebop.com%s' %img_large
img_list.append(img_large)
print '产品图片:' , img_list

#产品品牌:
brand = pqhtml('div').filter('.productInfo')('a:first').text()
print '品牌:%s' %brand

#价格
price_div = pqhtml('div').filter('#product_price')  #根据ID获取价格的div
price_first_span = price_div('span:first') #获取第一个span
old_price = ''
new_price = ''
if price_first_span.hasClass('old_price'):
old_price = price_first_span.text
new_price = price_div('span:eq(1)').text() + ' / ' + price_div('span:eq(3)').text()
else:
new_price = price_div.text() + ' / ' + price_div('span:first').text
print '价格:' , new_price
#print '价格:%s' % new_price #这样打印会报编码错误:'ascii' codec can't encode character u'\u20ac' in position 21: ordinal not in range(128)

#size
size_option = pqhtml('select').filter('.newInput2')('option')
size_list = []
for size in size_option:        #为HTMLElement对象
size_list.append(size.text)
print 'size:', size_list

#产品名:
pname = pqhtml('div').filter('.productInfo')('span:first').text()
print '产品名:%s' % pname

if __name__ == '__main__':
main()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: