您的位置:首页 > 编程语言 > Python开发

[置顶] 【python 爬虫】selenium+phontomjs 用法

2017-06-15 09:41 507 查看






程序1:输入病员号,查询报告列表信息

# encoding: utf-8

from selenium import webdriver
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from lxml import etree
import pandas as pd
import time
time1=time.time()
driver=webdriver.PhantomJS(executable_path='D:\\Program Files\\Python27\\Scripts\\phantomjs.exe')

xuhao0=[]
xuhao1=[]
xuhao2=[]
ideintity1=[]
name1=[]
sex1=[]
age1=[]
group1=[]
apply_name=[]
apply_time=[]
status=[]
apply_num=[]

def spider(number):
try:
url = "http://211.83.161.4:8000/XHlisWebReport.aspx"
html=driver.get(url)
driver.find_element_by_id('txtoutpatient_id').send_keys(number)
driver.find_element_by_id('btnConfirm').click()
time.sleep(3)
html=driver.page_source
selector=etree.HTML(html)

num0=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[2]/span/text()')
for each in num0:
print each
xuhao0.append(each)

num1=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[3]/text()')
for each in num1:
print each
xuhao1.append(each)

num2=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[4]/text()')
for each in num2:
print each
xuhao2.append(each)

ideintity=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[6]/text()')
for each in ideintity:
print each
ideintity1.append(each)

name=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[7]/text()')
for each in name:
print each
name1.append(each)

sex=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[8]/text()')
for each in sex:
print each
sex1.append(each)

age=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[9]/text()')
for each in age:
print each
age1.append(each)

group=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[12]/text()')
for each in group:
print each
group1.append(each)

apply_name1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[13]/text()')
for each in apply_name1:
print each
apply_name.append(each)

apply_time1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[14]/text()')
for each in apply_time1:
print each
apply_time.append(each)

status1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[15]/text()')
for each in status1:
print each
status.append(each)

apply_num1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[16]/text()')
for each in apply_num1:
print each
apply_num.append(each)

except:
pass

if __name__ == '__main__':

#####病员号
number = '0000201091'
spider(number)
data=pd.DataFrame({"序号":xuhao0,"检验单":xuhao1,"病员号":xuhao2,"送检目的":ideintity1,"姓名":name1,"性别":sex1,"年龄":age1,\
"工作组":group1,"审核人员":apply_name,"审核时间":apply_time,"状态":status,"申请单号":apply_num})

print data
# 写出excel
writer = pd.ExcelWriter(r'C:\\XHlisWebReport.xlsx', engine='xlsxwriter', options={'strings_to_urls': False})
data.to_excel(writer, index=False)
writer.close()

time2 = time.time()
print u'ok,爬虫结束!'
print u'总共耗时:' + str(time2 - time1) + 's'
driver.close()


程序2 输入申请号查询报告详情

# encoding: utf-8

from selenium import webdriver
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from lxml import etree
import pandas as pd
import time
time1=time.time()
driver=webdriver.PhantomJS(executable_path='D:\\Program Files\\Python27\\Scripts\\phantomjs.exe')

number1=[]
No=[]
test_project=[]
result=[]
host=[]
values=[]
phone=[]
status=[]

def spider(number):
try:
url="http://211.83.161.4:8000/XHlisWebReport.aspx"
driver.get(url)
driver.find_element_by_id('txtrequisition_id').send_keys(number)
driver.find_element_by_id('btnConfirm').click()
time.sleep(3)
driver.find_element_by_xpath('//*[@id="GridView1"]/tbody/tr[2]').click()
html2=driver.page_source
selector=etree.HTML(html2)

No1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[1]/text()')
for each in No1:
print each
number1.append(number)
No.append(each)

test_project1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[2]/text()')
for each in test_project1:
print each
test_project.append(each)

result1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[3]/text()')
for each in result1:
print each
result.append(each)

host1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[4]/text()')
for each in host1:
print each
host.append(each)

status1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[5]/text()')
for each in status1:
print each
status.append(each)

values1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[6]/text()')
for each in values1:
print each
values.append(each)

phone1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[7]/text()')
for each in phone1:
print each
phone.append(each)

except:
pass

if __name__ == '__main__':

########条码号################
number = '1166372801'
spider(number)

data = pd.DataFrame({"条码号":number1,"NO":No,"检验项目":test_project,"结果":result,"单位":host,"参考值":values,"代号":phone,"状态":status})
print data
# 写出excel
writer = pd.ExcelWriter(r'C:\\Reportdetail.xlsx', engine='xlsxwriter', options={'strings_to_urls': False})
data.to_excel(writer, index=False)
writer.close()

time2 = time.time()
print u'ok,爬虫结束!'
print u'总共耗时:' + str(time2 - time1) + 's'
driver.close()
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: