您的位置:首页 > 其它

txt文件编码判断

2015-09-17 14:49 281 查看
txt文件,首先分有无bom(Byte Order Mark)。有的话,相对简单,没有的话,还要区分uft-8和ANSI的区别。代码如下:

private static boolean isutf8(byte[] str)
{
int i = 0;
int size = str.length;

while(i < size)
{
int step = 0;
if((str[i] & 0x80) == 0x00)
{
step = 1;
}
else if((str[i] & 0xe0) == 0xc0)
{
if(i + 1 >= size) return false;
if((str[i + 1] & 0xc0) != 0x80) return false;

step = 2;
}
else if((str[i] & 0xf0) == 0xe0)
{
if(i + 2 >= size) return false;
if((str[i + 1] & 0xc0) != 0x80) return false;
if((str[i + 2] & 0xc0) != 0x80) return false;

step = 3;
}
else
{
return false;
}

i += step;
}

if(i == size) return true;

return false;
}

private static String getCode(byte[] bytes){
InputStream in = new ByteArrayInputStream(bytes);
BufferedInputStream bin = new BufferedInputStream(in);
String code = null;//判断字符编码
try {
int p = (bin.read() << 8) + bin.read();
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
if(isutf8(bytes))
code = "UTF-8";
else{
code = "GBK";
}

}
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}finally{
try {
bin.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return code;
}
private static String getCode(byte[] bytes) {
String hexCode = "";
if(bytes.length > 1){
for (int i = 0; i < 2; i++) {
String hex = Integer.toHexString(bytes[i] & 0xFF);
if (hex.length() == 1) {
hex = '0' + hex;
}
hexCode+=hex;
}
}

String code = null;//判断字符编码
if(hexCode.toLowerCase().equals("efbb"))
code = "UTF-8";
else if(hexCode.toLowerCase().equals("fffe"))
code = "Unicode";
else if(hexCode.toLowerCase().equals("feff"))
code = "UTF-16BE";
else{
if(isutf8(bytes))
code = "UTF-8";
else{
code = "GBK";
}
}
return code;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  编码 utf8 无bom