python selenium,PhantomJS运用 抓取滚动条滚动加载的页面, js动作操作,模拟登陆
2016-07-06 19:34
477 查看
1、运用
#!/usr/bin/python #encoding=utf-8 import sys from selenium import webdriver reload(sys) sys.setdefaultencoding( "utf-8" ) driver = webdriver.PhantomJS(executable_path='/home/lhy/phantomjs-1.9.8-linux-x86_64/bin/phantomjs') driver.get("http://item.jd.com/2914823.html") #driver.find_element_by_id('search_form_input_homepage').send_keys("Nirvana") #driver.find_element_by_id("search_button_homepage").click() print driver.page_source fo = open("aaaa1.txt", "wb") fo.write(driver.page_source) fo.close() driver.quit()
2、抓取下拉加载的页面
#coding=utf-8 import requests import re import time from pyquery import PyQuery as pq from lxml import etree from bs4 import BeautifulSoup import sys from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities reload(sys) sys.setdefaultencoding("utf-8") urls=[] def getHtml2(url): user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; windows NT)' headers = {'User-Agent': user_agent} r = requests.post(url, headers=headers) fo = open("phonesinfo1.txt", "wb") fo.write(r.content) fo.close() #print r.content return r.content def getHtml(url): dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/4.0 (compatible; MSIE 5.5; windows NT)" ) driver = webdriver.PhantomJS(desired_capabilities=dcap) #driver = webdriver.PhantomJS(executable_path='/home/lhy/phantomjs-1.9.8-linux-x86_64/bin/phantomjs') #driver=webdriver.Chrome() driver.get(url) <span style="color:#FF0000;"> js="document.body.scrollTop=1000"#滚动条下拉1000px driver.execute_script(js)</span> driver.implicitly_wait(30) #time.sleep(5) #fo = open("phonesinfo2.txt", "wb") #fo.write(driver.page_source) #fo.close() html=driver.page_source driver.quit() return html def getPqHtml(html): pqHtml = pq(html) return pqHtml def getUrlsFromFile(fileUrl): with open('phoneurl.txt', 'r') as f: lines = f.readlines() for line in lines: url_one = line.strip() print url_one urls.append(url_one) url="http://localhost:8080/pro/html.html" text=getHtml(url) fo = open("taobao2.txt", "wb") fo.write(text) fo.close() print text
html 页面
<html> <head> </head> <body style="height:5000px"> <div id="top_div" style="display:none">ffffffffffffffffffffff</div> <script> //document.body.scrollTop=10000; window.onscroll = function(){ var t = document.documentElement.scrollTop || document.body.scrollTop; var top_div = document.getElementById( "top_div" ); if( t >= 300 ) { // alert(t); top_div.style.display = "block"; } // else { top_div.style.display = "none";} } </script> </body> </html>
3、模拟登陆
# coding = utf-8 from selenium import webdriver browser = webdriver.Firefox() <span style="color:#FF0000;">browser.get("http://localhost:8080/pro") browser.find_element_by_name("password").clear() #先清除文本框上密码 browser.find_element_by_name("username").send_keys("test") #设置值 browser.find_element_by_name("password").send_keys("123") #设置值 yzm=browser.find_element_by_class_name("yzm-img").find_element_by_tag_name("span").text #获取验证码值 yzm=yzm.replace(' ','') #清除空格 browser.find_element_by_class_name("yzm-sr").send_keys(yzm) #设置验证码 browser.find_element_by_id("tijiao").click() #点击按钮 提交表单 print browser.current_url browser.get("http://localhost:8080/pro/test.jsp")#模拟登陆成功后会自动把cookie保存在对象中,对需认证页面可直接访问</span> print browser.page_source #browser.quit()
4、百度搜索
# coding = utf-8 from selenium import webdriver browser = webdriver.Firefox() browser.get("http://www.baidu.com") browser.find_element_by_id("kw").clear() browser.find_element_by_id("kw").send_keys("selenium") browser.find_element_by_id("su").click() print browser.current_url #点击成功后调转页面的url #browser.quit()
相关文章推荐
- Python作业第一课
- 文成小盆友python-num9 socket编程
- VSCode的MagicPython插件
- 使用PyCharm配置Spark的Python开发环境(基础)
- 使用uwsgi 部署python web服务
- Python入门:验证码破解(二)
- 『python学习』正则表达式学习
- python并发获取snmp信息及性能测试
- 用Python和OpenCV创建一个图片搜索引擎的完整指南
- python实现根据两点经纬度计算实际距离
- Python学习之路(Win7)之Python核心编程
- python2.0_s12_day12_html介绍
- 在windows7(64位)下安装python(3.4)的theano库
- python for循环remove同一个list
- python for循环remove同一个list 推荐
- python冒泡排序和Range用法
- Python学习笔记 —— mysql数据库使用
- python特有的输出格式
- 关于Python爬虫程序scrapy的安装问题
- 决策树--ID3