您的位置:首页 > 编程语言 > Python开发

python 字符处理代码 1.0

2015-07-25 18:01 741 查看
#! /usr/bin/env python

# -*- coding: gbk -*-

import os

import string

class CBunchStrTools:

@staticmethod

def Atoa(num):

str0 = '%d' %num

return str0

@staticmethod

def strQ2B(ustring):

"""把字符串全角转半角"""

rstring = ""

for uchar in ustring:

inside_code=ord(uchar)

if inside_code==0x3000:

inside_code=0x0020

else:

inside_code-=0xfee0

if inside_code<0x0020 or inside_code>0x7e: #转完之后不是半角字符返回原来的字符

rstring += uchar

rstring += unichr(inside_code)

return rstring

@staticmethod

def CheckCodeType(str):

code_type = 0

strlen = len(str)

i = 0

while True:

if i >= strlen :

return code_type

# 1(129-254) 2(64-254)

#print ord(str[i])

if (ord(str[i]) == 10) or (ord(str[i]) == 14) or (ord(str[i]) == 32) or (ord(str[i]) >= ord('A') and ord(str[i]) <= ord('Z')) or (ord(str[i]) >= ord('a') and ord(str[i]) <= ord('z')) or (ord(str[i]) >= ord('0') and ord(str[i]) <=
ord('9')):

code_type = code_type | 0x01

i = i + 1

continue

elif (i + 1) < strlen:

if ord(str[i]) >= 129 and ord(str[i]) <= 254 and ord(str[i + 1]) >= 64 and ord(str[i + 1]) <= 254:

code_type = code_type | 0x2;

i = i + 2

continue

else:

return code_type

else:

print str[i]

return 0

#false mean not find ,true mean find it

@staticmethod

def FilterSpeChar(str):

tmpStr = "`!@$%^&*()=[]{}\\|:;\"\',/<>?"

for tmpChar in tmpStr:

if str.find(tmpChar) < 0:

continue

else:

return False

return True

#false mean not find ,true mean find it

@staticmethod

def FilterSpeHz(str):

tmpStr = "~~!@#¥%……&*()【】{}:;“‘《》,。?、"

for tmpChar in tmpStr:

if str.find(tmpChar) < 0:

continue

else:

return True

return False

class CUnicodeChar:

@staticmethod

def is_chinese(uchar):

"""判断一个unicode是否是汉字"""

if uchar >= u'\u4e00' and uchar<=u'\u9fa5':

return True

else:

return False

@staticmethod

def is_number(uchar):

"""判断一个unicode是否是数字"""

if uchar >= u'\u0030' and uchar<=u'\u0039':

return True

else:

return False

@staticmethod

def is_alphabet(uchar):

"""判断一个unicode是否是英文字母"""

if (uchar >= u'\u0041' and uchar<=u'\u005a') or (uchar >= u'\u0061' and uchar<=u'\u007a'):

return True

else:

return False

@staticmethod

def is_other(uchar):

"""判断是否非汉字,数字和英文字符"""

if not (CUnicodeChar.is_chinese(uchar) or CUnicodeChar.is_number(uchar) or CUnicodeChar.is_alphabet(uchar)):

return True

else:

return False

@staticmethod

def B2Q(uchar):

"""半角转全角"""

inside_code=ord(uchar)

if inside_code<0x0020 or inside_code>0x7e: #不是半角字符就返回原来的字符

return uchar

if inside_code==0x0020: #除了空格其他的全角半角的公式为:半角=全角-0xfee0

inside_code=0x3000

else:

inside_code+=0xfee0

return unichr(inside_code)

@staticmethod

def Q2B(uchar):

"""全角转半角"""

inside_code=ord(uchar)

if inside_code==0x3000:

inside_code=0x0020

else:

inside_code-=0xfee0

class CUnicodeStr:

def test(self):

pass

#处理unicode字符

@staticmethod

def DealSpace(ustr):

str0 = ustr.strip()

str1 = ustr.replace(" ", " ")

str1 = " ".join(str1.split())

return str1

@staticmethod

def IsDigitStr(str):

d1 = str.split(".")

if str.isdigit():

return True

elif len(d1) == 2:

if d1[0].isdigit() and d1[1].isdigit():

return True

return False

@staticmethod

def IsAlphabetlt3Str(str):

for ch in str:

if CUnicodeChar.is_alphabet(ch) or CUnicodeChar.is_number(ch):

continue

else:

return False

if len(str) < 3:

return True

else:

return False

@staticmethod

def IsFirstIsTip(str):

if str.replace(" ", "") == "":

return False

str = str.strip()

if str[0] == ".":

return True

return False

@staticmethod

def IsLastIsTip(str):

if str.replace(" ", "") == "":

return False

str = str.strip()

if str[len(str) - 1] == ".":

return True

return False

@staticmethod

def IsDigitThenChar(str):

if len(str) < 2:

return False

flag = 0

digitstr = ""

for ch in str:

#判断存在小数点和数字符号

if CUnicodeChar.is_number(ch) or ch == ".":

digitstr = digitstr + ch

if flag == 2:

return False

elif flag == 1 or flag == 0:

flag = 1

continue

else:

return False

#判断是数字

if CUnicodeStr.IsDigitStr(digitstr):

pass

else:

return False

if CUnicodeChar.is_alphabet(ch):

if flag == 0:

return False

else:

flag = 2

continue

if flag == 2:

return True

else:

return False

@staticmethod

def IsNumAndChar(str):

str = str.strip()

if len(str) <1:

return False

for ch in str:

if CUnicodeChar.is_number(ch) or CUnicodeChar.is_alphabet(ch) or ch == "." or ch == " ":

continue

else:

return False

return True

return False

@staticmethod

def IsCharThenDigit(str):

if len(str) < 2:

return False

flag = 0

for ch in str:

if CUnicodeChar.is_alphabet(ch):

if flag == 2:

return False

elif flag == 1 or flag == 0:

flag = 1

continue

else:

return False

if CUnicodeChar.is_number(ch) or ch == ".":

if flag == 0:

return False

else:

flag = 2

continue

if flag == 2:

return True

else:

return False

@staticmethod

def GetAlphabetFromStr(str):

tmpstr = ""

for ch in str:

if CUnicodeChar.is_alphabet(ch):

tmpstr = tmpstr + ch

return tmpstr

@staticmethod

def GetHzFromStr(str):

tmpstr = ""

for ch in str:

if CUnicodeChar.is_chinese(ch):

tmpstr = tmpstr + ch

return tmpstr

@staticmethod

def IsDigitThenHz(str):

if len(str) < 2:

return False

flag = 0

digitstr = ""

for ch in str:

if CUnicodeChar.is_number(ch) or ch == ".":

digitstr = digitstr + ch

if flag == 2:

return False

elif flag == 1 or flag == 0:

flag = 1

continue

else:

return False

#判断是数字

if CUnicodeStr.IsDigitStr(digitstr):

pass

else:

return False

if CUnicodeChar.is_chinese(ch):

if flag == 0:

return False

else:

flag = 2

continue

if flag == 2:

return True

else:

return False

@staticmethod

def IsKeyWordStr(str):

#为空格去掉

if str.replace(" ", "") == "":

return False

str = str.strip()

#全数字去掉

if CUnicodeStr.IsDigitStr(str):

return False

#第一个字符是"."

if CUnicodeStr.IsFirstIsTip(str):

return False

#最后一个是"."去掉

if CUnicodeStr.IsLastIsTip(str):

return False

#全字母小于三个

if CUnicodeStr.IsAlphabetlt3Str(str):

return False

for ch in str:

#if CUnicodeChar.is_chinese(ch) or CUnicodeChar.is_number(ch) or CUnicodeChar.is_alphabet(ch) or ch == '.' or ch.isspace():

if CUnicodeChar.is_chinese(ch) or CUnicodeChar.is_number(ch) or CUnicodeChar.is_alphabet(ch) or ch == '.' or ch.isspace() or ch == '-' or ch == '#' or ch == '~':

continue

else:

#g_Log.debug("IsKeyWordStr is false as result of : " + ch)

return False

#g_Log.debug("IsKeyWordStr is true")

return True

@staticmethod

def stringQ2B(ustring):

"""把字符串全角转半角"""

return "".join([CUnicodeChar.Q2B(uchar) for uchar in ustring])

@staticmethod

def uniform(ustring):

"""格式化字符串,完成全角转半角,大写转小写的工作"""

return CUnicodeStr.stringQ2B(ustring).lower()

@staticmethod

def string2List(ustring):

"""将ustring按照中文,字母,数字分开"""

retList=[]

utmp=[]

for uchar in ustring:

if CUnicodeChar.is_other(uchar):

if len(utmp)==0:

continue

else:

retList.append("".join(utmp))

utmp=[]

else:

utmp.append(uchar)

if len(utmp)!=0:

retList.append("".join(utmp))

return retList
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: