python批量查询网站名称
2016-05-30 10:04
1051 查看
前言:最近一段时间老大给我一大批网站域名,让我帮忙查询这些网站域名对应的网站名称。作为一个IT屌丝,这种事情不可能手动一个一个去查询,于是决定写个自动化脚本去查询。
思路:网上有很多域名查询的网站,但是大部分都是要验证码的,这样的话对于编写脚本的难度就增加了很多。千辛万苦找到了一个http://icp.chinaz.com/。于是就开始编写脚本,由于近段时间在学习selenium所以就用它的框架去写。
功能:读取txt文件中的域名,然后查询域名对应的网站名称,查询结束后把结果写入excel表格里。
废话不多说,直接上代码:
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
import xlwt
class Myurltest(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.implicitly_wait(30)
self.base_url = "http://icp.chinaz.com/"
self.verificationErrors = []
self.accept_next_alert = True
def test_myurl(self):
driver = self.driver
driver.get(self.base_url + "/")
driver.maximize_window()
f=open("newurl.txt",'rb+')
line=f.readlines()
book = xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet=book.add_sheet(u'data',cell_overwrite_ok=True)
for i in range(len(line)):
driver.find_element_by_id("s").clear()
driver.find_element_by_id("s").send_keys(line[i])
driver.find_element_by_id("search").click()
try:
driver.implicitly_wait(0.5)
data=driver.find_element_by_xpath("//div/ul[@id='first']/li[4]/p").text
except NoSuchElementException,e:
sheet.write(i,0,line[i])
sheet.write(i,1,u"没查到")
print u"没查到"
continue
print line[i],
print data
sheet.write(i,0,line[i])
sheet.write(i,1,data)
book.save('myurl.xls')
def is_element_present(self, how, what):
try: self.driver.find_element(by=how, value=what)
except NoSuchElementException, e: return False
return True
def is_alert_present(self):
try: self.driver.switch_to_alert()
except NoAlertPresentException, e: return False
return True
def close_alert_and_get_its_text(self):
try:
alert = self.driver.switch_to_alert()
alert_text = alert.text
if self.accept_next_alert:
alert.accept()
else:
alert.dismiss()
return alert_text
finally: self.accept_next_alert = True
def tearDown(self):
self.driver.quit()
self.assertEqual([], self.verificationErrors)
if __name__ == "__main__":
unittest.main()
思路:网上有很多域名查询的网站,但是大部分都是要验证码的,这样的话对于编写脚本的难度就增加了很多。千辛万苦找到了一个http://icp.chinaz.com/。于是就开始编写脚本,由于近段时间在学习selenium所以就用它的框架去写。
功能:读取txt文件中的域名,然后查询域名对应的网站名称,查询结束后把结果写入excel表格里。
废话不多说,直接上代码:
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
import xlwt
class Myurltest(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.implicitly_wait(30)
self.base_url = "http://icp.chinaz.com/"
self.verificationErrors = []
self.accept_next_alert = True
def test_myurl(self):
driver = self.driver
driver.get(self.base_url + "/")
driver.maximize_window()
f=open("newurl.txt",'rb+')
line=f.readlines()
book = xlwt.Workbook(encoding='utf-8',style_compression=0)
sheet=book.add_sheet(u'data',cell_overwrite_ok=True)
for i in range(len(line)):
driver.find_element_by_id("s").clear()
driver.find_element_by_id("s").send_keys(line[i])
driver.find_element_by_id("search").click()
try:
driver.implicitly_wait(0.5)
data=driver.find_element_by_xpath("//div/ul[@id='first']/li[4]/p").text
except NoSuchElementException,e:
sheet.write(i,0,line[i])
sheet.write(i,1,u"没查到")
print u"没查到"
continue
print line[i],
print data
sheet.write(i,0,line[i])
sheet.write(i,1,data)
book.save('myurl.xls')
def is_element_present(self, how, what):
try: self.driver.find_element(by=how, value=what)
except NoSuchElementException, e: return False
return True
def is_alert_present(self):
try: self.driver.switch_to_alert()
except NoAlertPresentException, e: return False
return True
def close_alert_and_get_its_text(self):
try:
alert = self.driver.switch_to_alert()
alert_text = alert.text
if self.accept_next_alert:
alert.accept()
else:
alert.dismiss()
return alert_text
finally: self.accept_next_alert = True
def tearDown(self):
self.driver.quit()
self.assertEqual([], self.verificationErrors)
if __name__ == "__main__":
unittest.main()
相关文章推荐
- Python动态类型的学习---引用的理解
- Python3写爬虫(四)多线程实现数据爬取
- 垃圾邮件过滤器 python简单实现
- 下载并遍历 names.txt 文件,输出长度最长的回文人名。
- install and upgrade scrapy
- Scrapy的架构介绍
- Centos6 编译安装Python
- 使用Python生成Excel格式的图片
- 让Python文件也可以当bat文件运行
- [Python]推算数独
- Python中zip()函数用法举例
- Python中map()函数浅析
- Python将excel导入到mysql中
- Python在CAM软件Genesis2000中的应用
- 使用Shiboken为C++和Qt库创建Python绑定
- FREEBASIC 编译可被python调用的dll函数示例
- Python 七步捉虫法