码迷,mamicode.com
首页 > Windows程序 > 详细

C#: 判断txt文件的编码格式

时间:2016-05-10 15:18:53      阅读:1054      评论:0      收藏:0      [点我收藏+]

标签:

public static Encoding GetTextEncoding(string filePath)
        {           
            FileStream fs = new FileStream(filePath, FileMode.Open);
            byte[] buff = new byte[10];
            int result = fs.Read(buff, 0, 10);
            if (result > 3)
            {
                if (buff[0] == 239 && buff[1] == 187 && buff[2] == 191)
                {// utf-8  
                    return Encoding.UTF8;                    
                }
                else if (buff[0] == 254 && buff[1] == 255)
                {// big endian unicode  
                    return Encoding.BigEndianUnicode;                    
                }
                else if (buff[0] == 255 && buff[1] == 254)
                {// unicode  
                    return Encoding.Unicode;                    
                }
                else if (isUtf8(buff))
                {// utf-8  
                    return Encoding.UTF8;                   
                }
                else
                {// ansi  
                    return Encoding.Default;                    
                }
            }           
            return null;
        }

        // 110XXXXX, 10XXXXXX  
        // 1110XXXX, 10XXXXXX, 10XXXXXX  
        // 11110XXX, 10XXXXXX, 10XXXXXX, 10XXXXXX  
        private static bool isUtf8(byte[] buff)
        {
            for (int i = 0; i < buff.Length; i++)
            {
                if ((buff[i] & 0xE0) == 0xC0)    // 110x xxxx 10xx xxxx  
                {
                    if ((buff[i + 1] & 0x80) != 0x80)
                    {
                        return false;
                    }
                }
                else if ((buff[i] & 0xF0) == 0xE0)  // 1110 xxxx 10xx xxxx 10xx xxxx  
                {
                    if ((buff[i + 1] & 0x80) != 0x80 || (buff[i + 2] & 0x80) != 0x80)
                    {
                        return false;
                    }
                }
                else if ((buff[i] & 0xF8) == 0xF0)  // 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx  
                {
                    if ((buff[i + 1] & 0x80) != 0x80 || (buff[i + 2] & 0x80) != 0x80 || (buff[i + 3] & 0x80) != 0x80)
                    {
                        return false;
                    }
                }
            }
            return true;
        }

参考:

http://blog.csdn.net/xt_chaoji/article/details/7345052

http://blog.csdn.net/nocml/article/details/8106068

C#: 判断txt文件的编码格式

标签:

原文地址:http://www.cnblogs.com/jane850113/p/5477879.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!