您的位置:首页 > 编程语言 > Python开发

python基础(xml,html,bs4)

2015-12-01 16:40 429 查看
http://python-data.dr-chuck.net/comments_42.xml

<commentinfo><note>This file contains the sample data for testing</note><comments><comment><name>Leven</name><count>100</count></comment><comment><name>Mahdiya</name><count>97</count></comment><comment><name>Ajayraj</name><count>87</count></comment><comment><name>Lillian</name><count>86</count></comment><comment><name>Aon</name><count>86</count></comment><comment><name>Ruaraidh</name><count>78</count></comment><comment><name>Gursees</name><count>75</count></comment><comment><name>Emmanuel</name><count>74</count></comment><comment><name>Christy</name><count>72</count></comment><comment><name>Annoushka</name><count>72</count></comment><comment><name>Inara</name><count>72</count></comment><comment><name>Caite</name><count>70</count></comment><comment><name>Rosangel</name><count>70</count></comment><comment><name>Iana</name><count>66</count></comment><comment><name>Anise</name><count>66</count></comment><comment><name>Jaosha</name><count>65</count></comment><comment><name>Cadyn</name><count>65</count></comment><comment><name>Edward</name><count>63</count></comment><comment><name>Charlotte</name><count>61</count></comment><comment><name>Sammy</name><count>60</count></comment><comment><name>Zarran</name><count>60</count></comment><comment><name>Rowen</name><count>59</count></comment><comment><name>Stanislaw</name><count>59</count></comment><comment><name>Maighdlin</name><count>57</count></comment><comment><name>Connan</name><count>56</count></comment><comment><name>Warrick</name><count>54</count></comment><comment><name>Diya</name><count>52</count></comment><comment><name>Lawson</name><count>52</count></comment><comment><name>Wu</name><count>51</count></comment><comment><name>Irmak</name><count>47</count></comment><comment><name>Emilija</name><count>47</count></comment><comment><name>Kayda</name><count>41</count></comment><comment><name>Ellenor</name><count>41</count></comment><comment><name>Kyra</name><count>41</count></comment><comment><name>Nikita</name><count>38</count></comment><comment><name>Kaelah</name><count>35</count></comment><comment><name>Meko</name><count>32</count></comment><comment><name>Marissa</name><count>31</count></comment><comment><name>Ayat</name><count>24</count></comment><comment><name>Sali</name><count>19</count></comment><comment><name>Hashem</name><count>19</count></comment><comment><name>Tygan</name><count>18</count></comment><comment><name>Rioden</name><count>17</count></comment><comment><name>Cruiz</name><count>16</count></comment><comment><name>Caoilfinn</name><count>13</count></comment><comment><name>Ewen</name><count>8</count></comment><comment><name>Baighley</name><count>7</count></comment><comment><name>Ramone</name><count>1</count></comment><comment><name>Kyran</name><count>1</count></comment><comment><name>Noelani</name><count>1</count></comment></comments></commentinfo>


import xml.etree.ElementTree as ET
import urllib2
input = urllib2.urlopen('http://python-data.dr-chuck.net/comments_42.xml').read()

commentinfo = ET.fromstring(input)
lst = commentinfo.findall('comments/comment')
sum = 0
for item in lst:
sum += int(item.find('count').text)
print sum #2553


from xml.dom import minidom
import os
import glob
import cv2

"""
<annotation>
"""
def InitVoc(vocfile):
doc = minidom.Document()
annotation = doc.createElement("annotation")
doc.appendChild(annotation)
addObject(doc, annotation, vocfile)
f = file(vocfile.split('.')[0] + ".xml","w")
doc.writexml(f)
f.close()

def addObject(doc, annotation, vocfile):
#<folder>VOC2007</folder>
folder = doc.createElement("folder")
folder.appendChild(doc.createTextNode("VOC2007"))
annotation.appendChild(folder)

#<filename>XXXXXX</filename>
filename = doc.createElement("filename")
filename.appendChild(doc.createTextNode(vocfile))
annotation.appendChild(filename)

"""
<source>
<database>The VOC2007 Database</database>
<annotation>PASCAL VOC2007</annotation>
<image>flickr</image>
<flickrid>341012865</flickrid>
</source>
"""
source = doc.createElement("source")
annotation.appendChild(source)

database = doc.createElement("database")
database.appendChild(doc.createTextNode("The VOC2007 Database"))
annotation2 = doc.createElement("annotation")
annotation2.appendChild(doc.createTextNode("PASCAL VOC2007"))
image = doc.createElement("image")
image.appendChild(doc.createTextNode("flickr"))
flickrid = doc.createElement("flickrid")
flickrid.appendChild(doc.createTextNode("wang"))
source.appendChild(database)
source.appendChild(annotation2)
source.appendChild(image)
source.appendChild(flickrid)

"""
<owner>
<flickrid>Fried Camels</flickrid>
<name>Jinky the Fruit Bat</name>
</owner>
"""
owner = doc.createElement("owner")
annotation.appendChild(owner)

flickrid2 = doc.createElement("flickrid")
flickrid2.appendChild(doc.createTextNode("wang"))
name = doc.createElement("image")
name.appendChild(doc.createTextNode("wang"))
owner.appendChild(flickrid2)
owner.appendChild(name)

"""
<size>
<width>353</width>
<height>500</height>
<depth>3</depth>
</size>
"""
img = cv2.imread(bmpfile)
size = doc.createElement("size")
annotation.appendChild(size)

width = doc.createElement("width")
width.appendChild(doc.createTextNode(str(len(img[0]))))
height = doc.createElement("height")
height.appendChild(doc.createTextNode(str(len(img))))
depth = doc.createElement("depth")
depth.appendChild(doc.createTextNode(str(img[0][0].size)))
size.appendChild(width)
size.appendChild(height)
size.appendChild(depth)

#<segmented>0</segmented>
segmented = doc.createElement("segmented")
segmented.appendChild(doc.createTextNode("0"))
annotation.appendChild(segmented)
"""
<object>
<name>dog</name>
<pose>Left</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>48</xmin>
<ymin>240</ymin>
<xmax>195</xmax>
<ymax>371</ymax>
</bndbox>
</object>
"""
f = open(vocfile.split('.')[0] + '.txs')
strs = ""
for i in f.read():
if '\\x' not in repr(i):
strs += i
for line in strs.split(' '):
elem = line.split('{')
if len(elem) > 1:
if len(elem[1].split(',')) == 4:
nums = elem[1].split(',')
if len(elem[0]) == 1 and len(img) > int(nums[0]) and len(img) >= int(nums[2]) and len(img[0]) >= int(nums[3]) and len(img[0]) > int(nums[1]):
object = doc.createElement("object")
annotation.appendChild(object)
name2 = doc.createElement("name")
name2.appendChild(doc.createTextNode(elem[0]))
pose = doc.createElement("pose")
pose.appendChild(doc.createTextNode("Left"))
truncated = doc.createElement("truncated")
truncated.appendChild(doc.createTextNode("1"))
difficult = doc.createElement("difficult")
difficult.appendChild(doc.createTextNode("0"))
bndbox = doc.createElement("bndbox")
xmin = doc.createElement("xmin")
xmin.appendChild(doc.createTextNode(nums[0]))
ymin = doc.createElement("ymin")
ymin.appendChild(doc.createTextNode(nums[1]))
xmax = doc.createElement("xmax")
xmax.appendChild(doc.createTextNode(nums[2]))
ymax = doc.createElement("ymax")
ymax.appendChild(doc.createTextNode(nums[3]))
bndbox.appendChild(xmin)
bndbox.appendChild(ymin)
bndbox.appendChild(xmax)
bndbox.appendChild(ymax)
object.appendChild(name2)
object.appendChild(pose)
object.appendChild(truncated)
object.appendChild(difficult)
object.appendChild(bndbox)

os.chdir("E:\\shared\\Format_Trans_20160328\\src_txs")
bmpfiles = glob.glob("*.jpg")
for bmpfile in bmpfiles:
InitVoc(bmpfile)


from xml.dom.minidom import *
import struct

class PltHeader(object):
def __init__(self, uSize, uCharNum, uTest, uStrokeNum, reserve):
self.uSize = uSize
self.uCharNum = uCharNum
self.uText = uTest
self.uStrokeNum = uStrokeNum
self.reserve = reserve

my_plt = PltHeader(0, 0, [], 0, 0)
dom1 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/inkml/1233225548643.inkml")
dom2 = xml.dom.minidom.parse("E:/ADAB_set/ADAB_set/set_2/upx/1233225548643.upx")
root1 = dom1.documentElement
root2 = dom2.documentElement
itemlist1 = root1.getElementsByTagName('trace')
itemText = root2.getElementsByTagName("alternate")

uTest = repr(itemText[0].getAttribute("value")).split('\\u')

for i in uTest:
if i != "u'":
my_plt.uText.append(int(i.strip(' ').strip("'"), 16))

my_plt.uStrokeNum = len(itemlist1)
my_plt.uCharNum = len(my_plt.uText)
for j in range(256 - my_plt.uCharNum):
my_plt.uText.append(0)
my_plt.uText.append(my_plt.reserve)
nums = []
count = 0
for item in itemlist1:
for coordinate in str(item.firstChild.data).split(','):
count += 1
nums.append(int(coordinate.split(" ")[0]))
nums.append(int(coordinate.split(" ")[1]))
nums.append(65535)
nums.append(0)
nums.append(65535)
nums.append(65535)

my_plt.uSize = 520 + (my_plt.uStrokeNum + 1 + count) * 2 * 2

binfile = open("C:/Users/samsung/Desktop/1.plt","w+b")
ss = struct.pack('HH258H', my_plt.uSize, my_plt.uCharNum, *my_plt.uText)
binfile.write(ss)
ss = struct.pack('%dH'%(len(nums)),*nums)
binfile.write(ss)
binfile.close()
print my_plt.uStrokeNum, my_plt.uSize, my_plt.uText, my_plt.uCharNum
print len(nums)


from xml.dom.minidom import *
import struct
import os

os.chdir("E:/ADAB_set/ADAB_set")
reserve = 0

for root, dirs, files in os.walk(os.getcwd()):
for dir in dirs:
binfile = open("C:/Users/samsung/Desktop/%s.plt"%dir,"ab")
inkml_files = []
upx_files = []
os.chdir(".\\%s"%dir)

for inkml_root, inkml_dirs, inkml_files in os.walk(".\\inkml"):
break
for upx_root, upx_dirs, upx_files in os.walk(".\\upx"):
break
for i in range(len(inkml_files)):
uText = []
uStrokeNum = 0
uCharNum = 0
uSize = 0
domInkml = parse(".\\inkml\\%s"%inkml_files[i])
domUpx = parse(".\\upx\\%s"%upx_files[i])
rootInkml = domInkml.documentElement
rootUpx = domUpx.documentElement
itemListInkml = rootInkml.getElementsByTagName('trace')
itemValue = rootUpx.getElementsByTagName("alternate")
strTest = repr(itemValue[0].getAttribute("value")).split('\\u')
print strTest
print i
for i in strTest:
if i != strTest[0]:
if len(i.split(" ")) > 1:
uText.append(int(i.split(" ")[0], 16))
else:
uText.append(int(i.strip(' ').strip("'"), 16))
print uText
uStrokeNum = len(itemListInkml)
uCharNum = len(uText)
for j in range(256 - uCharNum):
uText.append(0)

uText.append(uStrokeNum)
uText.append(reserve)

nums = []
count = 0
for item in itemListInkml:
for coordinate in str(item.firstChild.data).split(','):
count += 1
nums.append(int(coordinate.split(" ")[0]))
nums.append(int(coordinate.split(" ")[1]))
nums.append(65535)
nums.append(0)
nums.append(65535)
nums.append(65535)
uSize = 520 + (uStrokeNum + 1 + count) * 2 * 2
ss = struct.pack('HH258H', uSize, uCharNum, *uText)
binfile.write(ss)
ss = struct.pack('%dH'%(len(nums)),*nums)
binfile.write(ss)
binfile.close()
os.chdir("..\\")
break


<html>
<head>
<title>People that Avah knows</title>
<style>
.overlay{
opacity:0.99;
background-color:#eee;
position:fixed;
width:100%;
height:100%;
top:0px;
left:0px;
z-index:1000;
}
</style>
</head>
<body>
<h1>People that Avah knows</h1>
<div class="overlay" id="overlay" style="display:none" >
<center>
<h2>
This screen randomly changes the height between list items and vanishes
after a while to make sure that you retrieve and process the data
in a Python program rather than simply counting down pressing links, and
and doing the assignment without writing a Python program :).
The names are in the same order in the HTML even though they
shift around on the screen visually.
Your Python program can look at the page as long as it likes.
</h2>
</center>
</div>
<ul>
<li style="margin-top: 7px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Daniyal.html">Daniyal</a></li>
<li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Fares.html">Fares</a></li>
<li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kynan.html">Kynan</a></li>
<li style="margin-top: 10px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Erika.html">Erika</a></li>
<li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Milly.html">Milly</a></li>
<li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ceara.html">Ceara</a></li>
<li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rennie.html">Rennie</a></li>
<li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Franco.html">Franco</a></li>
<li style="margin-top: 19px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Roxie.html">Roxie</a></li>
<li style="margin-top: 4px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Giyia.html">Giyia</a></li>
<li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zuzanna.html">Zuzanna</a></li>
<li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Calean.html">Calean</a></li>
<li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Robyn.html">Robyn</a></li>
<li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Sainabou.html">Sainabou</a></li>
<li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maximilian.html">Maximilian</a></li>
<li style="margin-top: 36px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Katso.html">Katso</a></li>
<li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Armaan.html">Armaan</a></li>
<li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keiva.html">Keiva</a></li>
<li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eiko.html">Eiko</a></li>
<li style="margin-top: 28px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Salahudin.html">Salahudin</a></li>
<li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwa.html">Marwa</a></li>
<li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nodoka.html">Nodoka</a></li>
<li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dhyia.html">Dhyia</a></li>
<li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lianne.html">Lianne</a></li>
<li style="margin-top: 5px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Tyree.html">Tyree</a></li>
<li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Diona.html">Diona</a></li>
<li style="margin-top: 35px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lliam.html">Lliam</a></li>
<li style="margin-top: 15px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Crystyn.html">Crystyn</a></li>
<li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Maca.html">Maca</a></li>
<li style="margin-top: 1px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marina.html">Marina</a></li>
<li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Leah.html">Leah</a></li>
<li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avril.html">Avril</a></li>
<li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Riagan.html">Riagan</a></li>
<li style="margin-top: 13px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Jaying.html">Jaying</a></li>
<li style="margin-top: 59px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emaly.html">Emaly</a></li>
<li style="margin-top: 26px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rheanne.html">Rheanne</a></li>
<li style="margin-top: 46px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Owais.html">Owais</a></li>
<li style="margin-top: 31px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aria.html">Aria</a></li>
<li style="margin-top: 8px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyie.html">Kyie</a></li>
<li style="margin-top: 48px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Keryis.html">Keryis</a></li>
<li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcous.html">Marcous</a></li>
<li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Catrin.html">Catrin</a></li>
<li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marcelina.html">Marcelina</a></li>
<li style="margin-top: 52px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Madeline.html">Madeline</a></li>
<li style="margin-top: 21px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Patrikas.html">Patrikas</a></li>
<li style="margin-top: 66px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lacey.html">Lacey</a></li>
<li style="margin-top: 57px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Eason.html">Eason</a></li>
<li style="margin-top: 9px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kyrillos.html">Kyrillos</a></li>
<li style="margin-top: 16px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Benjamin.html">Benjamin</a></li>
<li style="margin-top: 34px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ege.html">Ege</a></li>
<li style="margin-top: 39px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Marwan.html">Marwan</a></li>
<li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Celik.html">Celik</a></li>
<li style="margin-top: 42px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kendal.html">Kendal</a></li>
<li style="margin-top: 18px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kasja.html">Kasja</a></li>
<li style="margin-top: 22px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Asena.html">Asena</a></li>
<li style="margin-top: 12px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Harris.html">Harris</a></li>
<li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Si.html">Si</a></li>
<li style="margin-top: 56px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lumi.html">Lumi</a></li>
<li style="margin-top: 33px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Chevy.html">Chevy</a></li>
<li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Famara.html">Famara</a></li>
<li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Cara.html">Cara</a></li>
<li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Elisa.html">Elisa</a></li>
<li style="margin-top: 23px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Nihaal.html">Nihaal</a></li>
<li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vivian.html">Vivian</a></li>
<li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Farrah.html">Farrah</a></li>
<li style="margin-top: 69px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Shonagh.html">Shonagh</a></li>
<li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Monty.html">Monty</a></li>
<li style="margin-top: 80px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Turner.html">Turner</a></li>
<li style="margin-top: 6px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Oliver.html">Oliver</a></li>
<li style="margin-top: 24px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aayan.html">Aayan</a></li>
<li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Atom.html">Atom</a></li>
<li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abby.html">Abby</a></li>
<li style="margin-top: 68px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Makala.html">Makala</a></li>
<li style="margin-top: 79px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Rupert.html">Rupert</a></li>
<li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Aine.html">Aine</a></li>
<li style="margin-top: 50px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Feden.html">Feden</a></li>
<li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Annick.html">Annick</a></li>
<li style="margin-top: 44px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Avah.html">Avah</a></li>
<li style="margin-top: 72px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Emilie.html">Emilie</a></li>
<li style="margin-top: 47px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Autumn.html">Autumn</a></li>
<li style="margin-top: 25px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Edyn.html">Edyn</a></li>
<li style="margin-top: 43px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Efe.html">Efe</a></li>
<li style="margin-top: 29px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Kie.html">Kie</a></li>
<li style="margin-top: 105px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Dougal.html">Dougal</a></li>
<li style="margin-top: 58px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Carolyn.html">Carolyn</a></li>
<li style="margin-top: 30px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Lana.html">Lana</a></li>
<li style="margin-top: 54px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miryn.html">Miryn</a></li>
<li style="margin-top: 32px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Pearce.html">Pearce</a></li>
<li style="margin-top: 73px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ash.html">Ash</a></li>
<li style="margin-top: 49px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zahide.html">Zahide</a></li>
<li style="margin-top: 76px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Mathias.html">Mathias</a></li>
<li style="margin-top: 53px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Vaila.html">Vaila</a></li>
<li style="margin-top: 104px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Abbey.html">Abbey</a></li>
<li style="margin-top: 64px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Ayooluwa.html">Ayooluwa</a></li>
<li style="margin-top: 117px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Miriam.html">Miriam</a></li>
<li style="margin-top: 83px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Levon.html">Levon</a></li>
<li style="margin-top: 3px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Colin.html">Colin</a></li>
<li style="margin-top: 65px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Noah.html">Noah</a></li>
<li style="margin-top: 70px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Taegan.html">Taegan</a></li>
<li style="margin-top: 122px;"><a href="https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Zennon.html">Zennon</a></li>
</ul>
<script>
// http://stackoverflow.com/questions/20423322/simple-setting-off-display-none-block-with-javascript function showHide(id) {
var el = document.getElementById(id);
if( el && el.style.display == 'none')
el.style.display = 'block';
else
el.style.display = 'none';
}
setTimeout('showHide("overlay");', 2500);

</script>
</body>
</html>


import urllib2
from bs4 import BeautifulSoup

url = raw_input()

for i in range(7):
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
tags = soup('a')
print tags[17].string
url = tags[17].get('href', None)




<html>
<head>
<title>Welcome to the comments assignment from www.pythonlearn.com</title>
</head>
<body>
<h1>This file contains the sample data for testing</h1>

<table border="2">
<tr>
<td>Name</td><td>Comments</td>
</tr>
<tr><td>Leven</td><td><span class="comments">100</span></td></tr>
<tr><td>Mahdiya</td><td><span class="comments">97</span></td></tr>
<tr><td>Ajayraj</td><td><span class="comments">87</span></td></tr>
<tr><td>Lillian</td><td><span class="comments">86</span></td></tr>
<tr><td>Aon</td><td><span class="comments">86</span></td></tr>
<tr><td>Ruaraidh</td><td><span class="comments">78</span></td></tr>
<tr><td>Gursees</td><td><span class="comments">75</span></td></tr>
<tr><td>Emmanuel</td><td><span class="comments">74</span></td></tr>
<tr><td>Christy</td><td><span class="comments">72</span></td></tr>
<tr><td>Annoushka</td><td><span class="comments">72</span></td></tr>
<tr><td>Inara</td><td><span class="comments">72</span></td></tr>
<tr><td>Caite</td><td><span class="comments">70</span></td></tr>
<tr><td>Rosangel</td><td><span class="comments">70</span></td></tr>
<tr><td>Iana</td><td><span class="comments">66</span></td></tr>
<tr><td>Anise</td><td><span class="comments">66</span></td></tr>
<tr><td>Jaosha</td><td><span class="comments">65</span></td></tr>
<tr><td>Cadyn</td><td><span class="comments">65</span></td></tr>
<tr><td>Edward</td><td><span class="comments">63</span></td></tr>
<tr><td>Charlotte</td><td><span class="comments">61</span></td></tr>
<tr><td>Sammy</td><td><span class="comments">60</span></td></tr>
<tr><td>Zarran</td><td><span class="comments">60</span></td></tr>
<tr><td>Rowen</td><td><span class="comments">59</span></td></tr>
<tr><td>Stanislaw</td><td><span class="comments">59</span></td></tr>
<tr><td>Maighdlin</td><td><span class="comments">57</span></td></tr>
<tr><td>Connan</td><td><span class="comments">56</span></td></tr>
<tr><td>Warrick</td><td><span class="comments">54</span></td></tr>
<tr><td>Diya</td><td><span class="comments">52</span></td></tr>
<tr><td>Lawson</td><td><span class="comments">52</span></td></tr>
<tr><td>Wu</td><td><span class="comments">51</span></td></tr>
<tr><td>Irmak</td><td><span class="comments">47</span></td></tr>
<tr><td>Emilija</td><td><span class="comments">47</span></td></tr>
<tr><td>Kayda</td><td><span class="comments">41</span></td></tr>
<tr><td>Ellenor</td><td><span class="comments">41</span></td></tr>
<tr><td>Kyra</td><td><span class="comments">41</span></td></tr>
<tr><td>Nikita</td><td><span class="comments">38</span></td></tr>
<tr><td>Kaelah</td><td><span class="comments">35</span></td></tr>
<tr><td>Meko</td><td><span class="comments">32</span></td></tr>
<tr><td>Marissa</td><td><span class="comments">31</span></td></tr>
<tr><td>Ayat</td><td><span class="comments">24</span></td></tr>
<tr><td>Sali</td><td><span class="comments">19</span></td></tr>
<tr><td>Hashem</td><td><span class="comments">19</span></td></tr>
<tr><td>Tygan</td><td><span class="comments">18</span></td></tr>
<tr><td>Rioden</td><td><span class="comments">17</span></td></tr>
<tr><td>Cruiz</td><td><span class="comments">16</span></td></tr>
<tr><td>Caoilfinn</td><td><span class="comments">13</span></td></tr>
<tr><td>Ewen</td><td><span class="comments">8</span></td></tr>
<tr><td>Baighley</td><td><span class="comments">7</span></td></tr>
<tr><td>Ramone</td><td><span class="comments">1</span></td></tr>
<tr><td>Kyran</td><td><span class="comments">1</span></td></tr>
<tr><td>Noelani</td><td><span class="comments">1</span></td></tr>
</table>
</body>
</html>


import urllib2
from bs4 import BeautifulSoup

url = raw_input()
html = urllib2.urlopen(url).read()
soup = BeautifulSoup(html)
tags = soup('span')
count, sum = 0, 0
for tag in tags:
count += 1
sum += int(tag.string)
print count, sum
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: