您的位置:首页 > 其它

selenium2使用记录

2016-07-16 18:00 288 查看
安装 pip install selenium

web

phantomjs下载 :http://phantomjs.org/download.html

浏览器驱动下载:http://www.seleniumhq.com/download

chrome: http://chromedriver.storage.googleapis.com/index.html?path=2.22/
#!/usr/bin/env python
# encoding: utf-8
from selenium import webdriver

driver = webdriver.Chrome()
url = 'http://www.toutiao.com/news_fashion/'

driver.get(url)

print driver.title


爬取今日头条实例,使用刷新方法,来改变文章内容,暂时还不会控制鼠标滑动来实现

#!/usr/bin/env python
# encoding: utf-8
import time
from selenium import webdriver
import itertools

driver = webdriver.Chrome()
url = 'http://www.toutiao.com/news_fashion/'
driver.get(url)
print driver.get(url)

for x in range(2):
driver.refresh()
titles = driver.find_elements_by_class_name("title-box")
contents = driver.find_elements_by_class_name("abstract")
imgs = driver.find_element_by_css_selector(".feedimg")
for title, content, img in zip(titles, contents, itertools.repeat(imgs)):
data = {
'title': title.text,
'content': content.text,
'img': img.get_attribute('src')
}
print data
time.sleep(10)

driver.close()


自动登陆的例子:

# coding:utf-8

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
#有验证码
driver = webdriver.Chrome()
url = 'http://mp.sohu.com/'
driver.get(url)

driver.find_element_by_id("userid").clear()
driver.find_element_by_id('userid').send_keys("username")
driver.find_element_by_id("pwd").clear()
driver.find_element_by_id("pwd").send_keys('password')
driver.find_element_by_id("loginbutton").click()

time.sleep(2)
driver.close()


 scrapy+selenium+phantomjs

class judge(Spider):
name = "judge"
start_urls = ["http://wenshu.court.gov.cn/List/List?sorttype=1&conditions=searchWord+2+AJLX++%E6%A1%88%E4%BB%B6%E7%B1%BB%E5%9E%8B:%E6%B0%91%E4%BA%8B%E6%A1%88%E4%BB%B6"]

def init_driver(self):
driver = webdriver.Chrome()
return driver

def parse(self,response):
driver = self.init_driver()
driver.get(self.start_urls[0])
sel = Selector(text=driver.page_source)
self.logger.info(u'---------------Parsing----------------')
print sel.xpath("//div[@class='dataItem'][1]/table/tbody/tr[1]/td/div[@class='wstitle']/a/text()").extract()
self.logger.info(u'---------------success----------------')


  

 

  

 
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: