您的位置:首页 > 编程语言 > Python开发

Python提交表单并处理返回结果

2014-12-29 23:06 543 查看
1、将本地文件内容批量提交到指定网址,并将结果保存到本地

import urllib 

import urllib2

import cookielib

cj = cookielib.LWPCookieJar()

opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

urllib2.install_opener(opener)

file = open("D://test.final.pdb.fas")#本地文件内容

url = 'http://scansite.mit.edu/cgi-bin/motifscan_seq'#注意是action不是网址

linen = file.readline()#读取文件的一行

lines = file.readline()

while linen:

    values ={'protein_id':linen,#提交表单的内容<input name="protein_id" vaule=""/>

         'sequence':lines,

         'motif_option':'all',

         'stringency':'High'}

    data = urllib.urlencode(values)#发送数据

    req = urllib2.Request(url,data)

    response = opener.open(req)

    fd = response.read()#读取返回结果

    linen = linen.strip('\n')#删除结尾的\n

    linen = linen.strip('>')

    file_object = open('D://'+linen+'.html','w')#写入D的文件

    file_object.write(fd)

    file_object.close()

    linen = file.readline()

    lines = file.readline()

 

file.close()

2、提取网页信息并进行处理

import re

file = open("D://name.txt")  

file_object = open('D://1.txt','w')

line = file.readline()

while line:

    line=line.strip('\n')

    file_object.write(line)

    print line

    filehtml = open("D://"+line+".html")

    linetxt = filehtml.readline()

    while(linetxt):

        foundresult = re.search('site=(?P<site>[a-zA-Z]\d+)&position=\d+&score=(?P<score>\d+.\d+)',linetxt) #re.search只能找一行的,re.findall可以找多行,返回一个数据集

        if(foundresult):

            print foundresult.group("site")+foundresult.group("score")

            file_object.write('\t'+foundresult.group("site")+'\t'+foundresult.group("score"))

        linetxt = filehtml.readline()        

    file_object.write('\n')

    line = file.readline()

    filehtml.close()

    

file_object.close() 

file.close()

3、补充

(1)读取文件

file = open("D://test.final.pdb.fas")

line = file.readline()

while line:

    print line,

    line = file.readline()

    

file.close()

(2)读取数据集中的每项数据的名称

file = open("D://test.final.pdb.fas")

file_object = open('D://name.txt','w')

line = file.readline()

while line:

    line = line.strip('>')

    file_object.write(line)

    line = file.readline()

    line = file.readline()

    

file_object.close()

file.close()

(3)re.findall()的处理

import urllib 

import urllib2

import cookielib

import re

cj = cookielib.LWPCookieJar()

opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

urllib2.install_opener(opener)

file = open("D://test.final.pdb.fas")  

file_object = open('D://1.txt','w')

url = 'http://scansite.mit.edu/cgi-bin/motifscan_seq'

linen = file.readline()

lines = file.readline()

while linen:

    values ={'protein_id':linen,

         'sequence':lines,

         'motif_option':'all',

         'stringency':'High'}

    data = urllib.urlencode(values)

    req = urllib2.Request(url,data)

    response = opener.open(req)

    fd = response.read()

    linen = linen.strip('\n')

    linen = linen.strip('>')

    file_object.write(linen)

    found = re.findall('site=[a-zA-Z]\d+&position=\d+&score=\d+.\d+',fd)

    #print found

    if(found):

        for i in found:

            foundresult = re.search('site=(?P<site>[a-zA-Z]\d+)&position=\d+&score=(?P<score>\d+.\d+)',i)

            print foundresult.group("site")+foundresult.group("score")

            file_object.write('\t'+foundresult.group("site")+'\t'+foundresult.group("score"))

            

    file_object.write('\n')

    linen = file.readline()

    lines = file.readline()

    

file_object.close() 

file.close()

入门学习连接:

python 自动填写表单xml解析: http://blog.chinaunix.net/uid-22340094-id-3059345.html
保存内容到本地 : http://jingyan.baidu.com/article/ed2a5d1f1b78ff09f6be178f.html

逐行读取文件 : http://www.cnblogs.com/sysuoyj/archive/2012/03/14/2395789.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python提交表单