您的位置:首页 > 编程语言 > Python开发

python 判断unicode字符串是汉字/数字/字母,全角/半角转换

2013-06-17 13:55 891 查看
文本处理经常会判断字符串是否是字母,数字或者汉字的问题,python str模块提供了些方法,但是不支持unicode,所以模仿str的相关方法,实现了unicode 字符串的汉字/数字/字母判断方法,以及全角半角转换。

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import sys

#set default encoding as UTF-8

#judge a character is a Chinese Character
def is_Chinese(uchar):
if len(uchar) != 1:
raise TypeError,'expected a character, but a string found!'

if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
return True
return False

#Judge a ustr is all Chinese
def is_all_Chinese(ustr):
for uchar in ustr:
if not is_Chinese(uchar):
return False

return True

#Judge a char is a number
def is_digit(uchar):
if len(uchar) != 1:
raise TypeError,'expected a character, but a string found!'

if uchar >= u'\u0030' and uchar<=u'\u0039':
return True
return False

#Judge a str is all num
def is_all_digit(ustr):
for uchar in ustr:
if not is_digit(uchar):
return False

return True

#Judge a char is a alphabet
def is_alpha(uchar):
if len(uchar) != 1:
raise TypeError,'expected a character, but a string found!'

if (uchar >= u'\u0041' and uchar<=u'\u005a') or \
(uchar >= u'\u0061' and uchar<=u'\u007a'):
return True
return False

#Judge a str is all alphabet
def is_all_alpha(ustr):
for uchar in ustr:
if not is_alpha(uchar):
return False

return True

def B2Q(uchar):
if len(uchar) != 1:
raise TypeError,'expected a character, but a string found!'

inner_code = ord(uchar)
if inner_code < 0x0020 or inner_code > 0x7e:      #不是半角字符就返回原来的字符
return uchar
if inner_code == 0x0020: #除了空格其他的全角半角的公式为:半角=全角-0xfee0
inner_code = 0x3000
inner_code += 0xfee0

return unichr(inner_code)

def Q2B(uchar):
if len(uchar) != 1:
raise TypeError,'expected a character, but a string found!'

inner_code = ord(uchar)
if inner_code == 0x3000:
inner_code = 0x0020
inner_code -= 0xfee0
if inner_code < 0x0020 or inner_code > 0x7e:      #转完之后不是半角字符返回原来的字符
return uchar

return unichr(inner_code)

def stringQ2B(ustring):
return ''.join([Q2B(uchar) for uchar in ustring])

#main function
if __name__ == '__main__':
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息