[置顶] 【python 爬虫】selenium+phontomjs 用法
2017-06-15 09:41
507 查看
程序1:输入病员号,查询报告列表信息
# encoding: utf-8 from selenium import webdriver import sys reload(sys) sys.setdefaultencoding('utf-8') from lxml import etree import pandas as pd import time time1=time.time() driver=webdriver.PhantomJS(executable_path='D:\\Program Files\\Python27\\Scripts\\phantomjs.exe') xuhao0=[] xuhao1=[] xuhao2=[] ideintity1=[] name1=[] sex1=[] age1=[] group1=[] apply_name=[] apply_time=[] status=[] apply_num=[] def spider(number): try: url = "http://211.83.161.4:8000/XHlisWebReport.aspx" html=driver.get(url) driver.find_element_by_id('txtoutpatient_id').send_keys(number) driver.find_element_by_id('btnConfirm').click() time.sleep(3) html=driver.page_source selector=etree.HTML(html) num0=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[2]/span/text()') for each in num0: print each xuhao0.append(each) num1=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[3]/text()') for each in num1: print each xuhao1.append(each) num2=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[4]/text()') for each in num2: print each xuhao2.append(each) ideintity=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[6]/text()') for each in ideintity: print each ideintity1.append(each) name=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[7]/text()') for each in name: print each name1.append(each) sex=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[8]/text()') for each in sex: print each sex1.append(each) age=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[9]/text()') for each in age: print each age1.append(each) group=selector.xpath('//*[@id="GridView1"]/tbody/tr/td[12]/text()') for each in group: print each group1.append(each) apply_name1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[13]/text()') for each in apply_name1: print each apply_name.append(each) apply_time1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[14]/text()') for each in apply_time1: print each apply_time.append(each) status1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[15]/text()') for each in status1: print each status.append(each) apply_num1= selector.xpath('//*[@id="GridView1"]/tbody/tr/td[16]/text()') for each in apply_num1: print each apply_num.append(each) except: pass if __name__ == '__main__': #####病员号 number = '0000201091' spider(number) data=pd.DataFrame({"序号":xuhao0,"检验单":xuhao1,"病员号":xuhao2,"送检目的":ideintity1,"姓名":name1,"性别":sex1,"年龄":age1,\ "工作组":group1,"审核人员":apply_name,"审核时间":apply_time,"状态":status,"申请单号":apply_num}) print data # 写出excel writer = pd.ExcelWriter(r'C:\\XHlisWebReport.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) data.to_excel(writer, index=False) writer.close() time2 = time.time() print u'ok,爬虫结束!' print u'总共耗时:' + str(time2 - time1) + 's' driver.close()
程序2 输入申请号查询报告详情
# encoding: utf-8 from selenium import webdriver import sys reload(sys) sys.setdefaultencoding('utf-8') from lxml import etree import pandas as pd import time time1=time.time() driver=webdriver.PhantomJS(executable_path='D:\\Program Files\\Python27\\Scripts\\phantomjs.exe') number1=[] No=[] test_project=[] result=[] host=[] values=[] phone=[] status=[] def spider(number): try: url="http://211.83.161.4:8000/XHlisWebReport.aspx" driver.get(url) driver.find_element_by_id('txtrequisition_id').send_keys(number) driver.find_element_by_id('btnConfirm').click() time.sleep(3) driver.find_element_by_xpath('//*[@id="GridView1"]/tbody/tr[2]').click() html2=driver.page_source selector=etree.HTML(html2) No1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[1]/text()') for each in No1: print each number1.append(number) No.append(each) test_project1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[2]/text()') for each in test_project1: print each test_project.append(each) result1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[3]/text()') for each in result1: print each result.append(each) host1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[4]/text()') for each in host1: print each host.append(each) status1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[5]/text()') for each in status1: print each status.append(each) values1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[6]/text()') for each in values1: print each values.append(each) phone1=selector.xpath('//*[@id="GridView2"]/tbody/tr/td[7]/text()') for each in phone1: print each phone.append(each) except: pass if __name__ == '__main__': ########条码号################ number = '1166372801' spider(number) data = pd.DataFrame({"条码号":number1,"NO":No,"检验项目":test_project,"结果":result,"单位":host,"参考值":values,"代号":phone,"状态":status}) print data # 写出excel writer = pd.ExcelWriter(r'C:\\Reportdetail.xlsx', engine='xlsxwriter', options={'strings_to_urls': False}) data.to_excel(writer, index=False) writer.close() time2 = time.time() print u'ok,爬虫结束!' print u'总共耗时:' + str(time2 - time1) + 's' driver.close()
相关文章推荐
- Python爬虫利器五之Selenium的用法
- python爬虫 使用selenium+phontomjs 模拟点击输入 获取东航加载后的源码 机票价格
- Python爬虫利器五之Selenium的用法
- [置顶] 【python 爬虫】selenium爬虫模块 phantomjs如何加代理IP
- python 针对selenium+phontomjs等模拟浏览器爬虫的反爬技术点
- Python爬虫利器之Selenium的用法
- Python爬虫利器五之Selenium的用法
- 运维学python之爬虫工具篇(五)Selenium的用法
- [置顶] 【python 爬虫】python淘宝爬虫实战(selenum+phontomjs)
- Python爬虫利器五之Selenium的用法
- Python爬虫利器五之Selenium的用法
- python selenium+phontomjs的详细用法及简单案例
- selenium_webdriver(python)键盘组合键用法,输入中文问题
- [转载]Python爬虫入门四之Urllib库的高级用法
- Python爬虫入门之Beautiful Soup的用法
- Python爬虫入门-Beautiful Soup的用法
- [python爬虫] Selenium定向爬取海量精美图片及搜索引擎杂谈
- [Python爬虫] Selenium自动访问Firefox和Chrome并实现搜索截图
- Python爬虫入门四之Urllib库的高级用法
- Python爬虫教程——入门四之Urllib库的高级用法