标签:ret tar 编码方式 出错 put 代码 osi type python
In [1]: name = ‘张三‘ In [2]: print name 张三 In [3]: name Out[3]: ‘\xe5\xbc\xa0\xe4\xb8\x89‘ #utf8编码,存储形式 In [4]: len(name) Out[4]: 6 In [5]: name[0:2] #分片操作 Out[5]: ‘\xe5\xbc‘ In [6]: print name[0:1] ? In [7]: type(name) #类型是字符串类型 Out[7]: str In [8]: type
In [8]: name = u‘张三‘ In [9]: name Out[9]: u‘\u5f20\u4e09‘ #Unicode编码 表现形式 In [10]: print name 张三 In [11]: print name[0:1] 张 In [12]: name[0:1] Out[12]: u‘\u5f20‘ In [13]: len(name) Out[13]: 2 In [15]: type(name) Out[15]: unicode #类型是一个unicode
In [37]: name = u‘张三‘ In [38]: b_name = name.encode(‘utf-8‘) #编码为不同的存储形式,既可以编码为utf-8 In [39]: b_name Out[39]: ‘\xe5\xbc\xa0\xe4\xb8\x89‘ In [47]: type(b_name) #类型为str Out[47]: str In [40]: b_name2 = name.encode(‘utf-16‘) #也可以编码为utf-16 In [41]: b_name2 Out[41]: ‘\xff\xfe _\tN‘ In [42]: b_name3 = name.encode(‘utf-32‘) #还可以编码为utf-32 In [43]: b_name3 Out[43]: ‘\xff\xfe\x00\x00 _\x00\x00\tN\x00\x00‘ In [44]: j_name = b_name.decode(‘utf-8‘) #把utf-8解码为Unicode In [45]: j_name Out[45]: u‘\u5f20\u4e09‘ In [46]: type(j_name) #类型为Unicode Out[46]: unicode
In [47]: name = u‘张三‘ In [50]: with open(‘/tmp/test‘, ‘w‘) as f: ...: f.write(name) ...: --------------------------------------------------------------------------- UnicodeEncodeError Traceback (most recent call last) <ipython-input-4-0d87fa01de83> in <module>() 1 with open(‘/tmp/test‘, ‘w‘) as f: ----> 2 f.write(name) UnicodeEncodeError: ‘ascii‘ codec can‘t encode characters in position 0-1: ordinal not in range(128)
In [51]: with open(‘/tmp/test‘, ‘w‘) as f: ...: f.write(name.encode(‘utf-8‘)) #编码为utf-8形式写入到文件里面 ...: In [52]: with open(‘/tmp/test‘, ‘r‘) as f: ...: new_name=f.read() ...: In [53]: new_name.decode(‘utf-8‘) #把utf-8解码为Unicode Out[53]: u‘\u5f20\u4e09‘
In [15]: name = u‘张三‘ In [16]: type(name) Out[16]: unicode
In [55]: import codecs In [56]: name = u‘张三‘ In [57]: with open(‘/tmp/test‘, ‘w‘, encoding=‘utf-8‘) as f: ...: f.write(name) ...: In [58]: with open(‘/tmp/test‘, ‘r‘, encoding=‘utf-8‘) as f: ...: new_name=f.read() ...: In [59]: new_name Out[59]: u‘\u5f20\u4e09‘
>>> name = ‘张三‘ >>> name ‘张三‘ >>> with open(‘/tmp/test‘, ‘w‘, encoding=‘utf-8‘) as f: ... f.write(name) ...
#在Python3中,我们需要编写接受str或bytes,并总是返回str的方法: def to_str(bytes_or_str): if isinstance(bytes_or_str, bytes): value = bytes_or_str.decode(‘utf-8‘) else: value = bytes_or_str return value # Instance of str #另外,还需要编写接受str或bytes,并总是返回bytes的方法: def to_bytes(bytes_or_str): if isinstance(bytes_or_str, str): value = bytes_or_str.encode(‘utf-8) else: value = bytes_or_str return value # Instance of bytes
#在Python2中,需要编写接受str或unicode,并总是返回unicode的方法: #python2 def to_unicode(unicode_or_str): if isinstance(unicode_or_str, str): value = unicode_or_str.decode(‘utf-8‘) else: value = unicode_or_str return value # Instance of unicode #另外,还需要编写接受str或unicode,并总是返回str的方法: #Python2 def to_str(unicode_or_str): if isinstance(unicode_or_str, unicode): value = unicode_or_str.encode(‘utf-8‘) else: value = unicode_or_str reutrn vlaue # Instance of str
标签:ret tar 编码方式 出错 put 代码 osi type python
原文地址:http://www.cnblogs.com/moon1223/p/6363690.html