您的位置:首页 > 编程语言 > Python开发

python urllib模块里面一些函数

2011-04-14 13:26 495 查看
urllib里面的一些函数,发现挺有用,特记在这里.备以后使用.

# splittype('type:opaquestring') --> 'type', 'opaquestring'

# splithost('//host[:port]/path') --> 'host[:port]', '/path'
# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'

# splitpasswd('user:passwd') -> 'user', 'passwd'

# splitport('host:port') --> 'host', 'port'

# splitquery('/path?query') --> '/path', 'query'

# splittag('/path#tag') --> '/path', 'tag'

# splitattr('/path;attr1=value1;attr2=value2;...') ->

# '/path', ['attr1=value1', 'attr2=value2', ...]

# splitvalue('attr=value') --> 'attr', 'value'

def splittype(url):

"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""

global _typeprog

if _typeprog is None:

import re

_typeprog = re.compile('^([^/:]+):')

match = _typeprog.match(url)

if match:

scheme = match.group(1)

return scheme.lower(), url[len(scheme) + 1:]

return None, url

_hostprog = None

def splithost(url):

"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""

global _hostprog

if _hostprog is None:

import re

_hostprog = re.compile('^//([^/?]*)(.*)$')

match = _hostprog.match(url)

if match: return match.group(1, 2)

return None, url

_userprog = None

def splituser(host):

"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""

global _userprog

if _userprog is None:

import re

_userprog = re.compile('^(.*)@(.*)$')

match = _userprog.match(host)

if match: return map(unquote, match.group(1, 2))

return None, host

_passwdprog = None

def splitpasswd(user):

"""splitpasswd('user:passwd') -> 'user', 'passwd'."""

global _passwdprog

if _passwdprog is None:

import re

_passwdprog = re.compile('^([^:]*):(.*)$',re.S)

match = _passwdprog.match(user)

if match: return match.group(1, 2)

return user, None

# splittag('/path#tag') --> '/path', 'tag'

_portprog = None

def splitport(host):

"""splitport('host:port') --> 'host', 'port'."""

global _portprog

if _portprog is None:

import re

_portprog = re.compile('^(.*):([0-9]+)$')

match = _portprog.match(host)

if match: return match.group(1, 2)

return host, None

_nportprog = None

def splitnport(host, defport=-1):

"""Split host and port, returning numeric port.

Return given default port if no ':' found; defaults to -1.

Return numerical port if a valid number are found after ':'.

Return None if ':' but not a valid number."""

global _nportprog

if _nportprog is None:

import re

_nportprog = re.compile('^(.*):(.*)$')

match = _nportprog.match(host)

if match:

host, port = match.group(1, 2)

try:

if not port: raise ValueError, "no digits"

nport = int(port)

except ValueError:

nport = None

return host, nport

return host, defport

_queryprog = None

def splitquery(url):

"""splitquery('/path?query') --> '/path', 'query'."""

global _queryprog

if _queryprog is None:

import re

_queryprog = re.compile('^(.*)\?([^?]*)$')

match = _queryprog.match(url)

if match: return match.group(1, 2)

return url, None

_tagprog = None

def splittag(url):

"""splittag('/path#tag') --> '/path', 'tag'."""

global _tagprog

if _tagprog is None:

import re

_tagprog = re.compile('^(.*)#([^#]*)$')

match = _tagprog.match(url)

if match: return match.group(1, 2)

return url, None

def splitattr(url):

"""splitattr('/path;attr1=value1;attr2=value2;...') ->

'/path', ['attr1=value1', 'attr2=value2', ...]."""

words = url.split(';')

return words[0], words[1:]

_valueprog = None

def splitvalue(attr):

"""splitvalue('attr=value') --> 'attr', 'value'."""

global _valueprog

if _valueprog is None:

import re

_valueprog = re.compile('^([^=]*)=(.*)$')

match = _valueprog.match(attr)

if match: return match.group(1, 2)

return attr, None
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: