您的位置:首页 > 编程语言 > Python开发

Python学习之内建模块(5):HTMLParser

2016-03-16 17:56 495 查看

下面的代码用于分析python官网的html源码,找到我们需要的python会议的信息,时间,地点,名称

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from html.parser import HTMLParser
from html.entities import name2codepoint

class MyHTMLParser(HTMLParser):

def __init__(self):
super().__init__()
self._mark = 0
def handle_starttag(self, tag, attrs):
if(attrs!=[] and 'event-title' in attrs[0]):self._mark,no = 1,print('{\ntitle:',end = '')
elif(tag == 'time'):self._mark,no = 2,print('time: %s'%attrs[0][1].strip(),end = '')
elif(attrs!=[] and 'event-location' in attrs[0]):self._mark,no = 3,print('location:',end = '')
def handle_data(self, data):
if(self._mark!=0):
print(data.strip(),'\n}'if (self._mark==3) else '')
self._mark %= 3
def handle_endtag(self, tag):
pass
def handle_startendtag(self, tag, attrs):
pass
def handle_comment(self, data):
pass
def handle_entityref(self, name):
pass
def handle_charref(self, name):
pass

with open(r'C:\Users\admin\Desktop\test.txt','r',encoding = 'utf-8') as f:
s = f.read()[1:]
parser = MyHTMLParser()
parser.feed(s)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: