标签:
运行时报错:UnicodeDecodeError: ‘ascii‘ codec can‘t decode byte 0xc4 in position 33: ordinal not in range(128)
Python2.7在Windows上一个bug!!!
解决方法:
参考官方patch: http://bugs.python.org/file19332/9291a.patch
如下代码:一,加入from itertools import count;
二,修改 def enum_types(mimedb)为
def enum_types(mimedb):
for i in count():
try:
yield _winreg.EnumKey(mimedb, i)
except EnvironmentError:
break
具体代码如下(直接复制就好):
1 """Guess the MIME type of a file. 2 3 This module defines two useful functions: 4 5 guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. 6 7 guess_extension(type, strict=1) -- guess the extension for a given MIME type. 8 9 It also contains the following, for tuning the behavior: 10 11 Data: 12 13 knownfiles -- list of files to parse 14 inited -- flag set when init() has been called 15 suffix_map -- dictionary mapping suffixes to suffixes 16 encodings_map -- dictionary mapping suffixes to encodings 17 types_map -- dictionary mapping suffixes to types 18 19 Functions: 20 21 init([files]) -- parse a list of files, default knownfiles (on Windows, the 22 default values are taken from the registry) 23 read_mime_types(file) -- parse one file, return a dictionary or None 24 """ 25 from itertools import count 26 import os 27 import sys 28 import posixpath 29 import urllib 30 try: 31 import _winreg 32 except ImportError: 33 _winreg = None 34 35 __all__ = [ 36 "guess_type","guess_extension","guess_all_extensions", 37 "add_type","read_mime_types","init" 38 ] 39 40 knownfiles = [ 41 "/etc/mime.types", 42 "/etc/httpd/mime.types", # Mac OS X 43 "/etc/httpd/conf/mime.types", # Apache 44 "/etc/apache/mime.types", # Apache 1 45 "/etc/apache2/mime.types", # Apache 2 46 "/usr/local/etc/httpd/conf/mime.types", 47 "/usr/local/lib/netscape/mime.types", 48 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 49 "/usr/local/etc/mime.types", # Apache 1.3 50 ] 51 52 inited = False 53 _db = None 54 55 56 class MimeTypes: 57 """MIME-types datastore. 58 59 This datastore can handle information from mime.types-style files 60 and supports basic determination of MIME type from a filename or 61 URL, and can guess a reasonable extension given a MIME type. 62 """ 63 64 def __init__(self, filenames=(), strict=True): 65 if not inited: 66 init() 67 self.encodings_map = encodings_map.copy() 68 self.suffix_map = suffix_map.copy() 69 self.types_map = ({}, {}) # dict for (non-strict, strict) 70 self.types_map_inv = ({}, {}) 71 for (ext, type) in types_map.items(): 72 self.add_type(type, ext, True) 73 for (ext, type) in common_types.items(): 74 self.add_type(type, ext, False) 75 for name in filenames: 76 self.read(name, strict) 77 78 def add_type(self, type, ext, strict=True): 79 """Add a mapping between a type and an extension. 80 81 When the extension is already known, the new 82 type will replace the old one. When the type 83 is already known the extension will be added 84 to the list of known extensions. 85 86 If strict is true, information will be added to 87 list of standard types, else to the list of non-standard 88 types. 89 """ 90 self.types_map[strict][ext] = type 91 exts = self.types_map_inv[strict].setdefault(type, []) 92 if ext not in exts: 93 exts.append(ext) 94 95 def guess_type(self, url, strict=True): 96 """Guess the type of a file based on its URL. 97 98 Return value is a tuple (type, encoding) where type is None if 99 the type can‘t be guessed (no or unknown suffix) or a string 100 of the form type/subtype, usable for a MIME Content-type 101 header; and encoding is None for no encoding or the name of 102 the program used to encode (e.g. compress or gzip). The 103 mappings are table driven. Encoding suffixes are case 104 sensitive; type suffixes are first tried case sensitive, then 105 case insensitive. 106 107 The suffixes .tgz, .taz and .tz (case sensitive!) are all 108 mapped to ‘.tar.gz‘. (This is table-driven too, using the 109 dictionary suffix_map.) 110 111 Optional `strict‘ argument when False adds a bunch of commonly found, 112 but non-standard types. 113 """ 114 scheme, url = urllib.splittype(url) 115 if scheme == ‘data‘: 116 # syntax of data URLs: 117 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 118 # mediatype := [ type "/" subtype ] *( ";" parameter ) 119 # data := *urlchar 120 # parameter := attribute "=" value 121 # type/subtype defaults to "text/plain" 122 comma = url.find(‘,‘) 123 if comma < 0: 124 # bad data URL 125 return None, None 126 semi = url.find(‘;‘, 0, comma) 127 if semi >= 0: 128 type = url[:semi] 129 else: 130 type = url[:comma] 131 if ‘=‘ in type or ‘/‘ not in type: 132 type = ‘text/plain‘ 133 return type, None # never compressed, so encoding is None 134 base, ext = posixpath.splitext(url) 135 while ext in self.suffix_map: 136 base, ext = posixpath.splitext(base + self.suffix_map[ext]) 137 if ext in self.encodings_map: 138 encoding = self.encodings_map[ext] 139 base, ext = posixpath.splitext(base) 140 else: 141 encoding = None 142 types_map = self.types_map[True] 143 if ext in types_map: 144 return types_map[ext], encoding 145 elif ext.lower() in types_map: 146 return types_map[ext.lower()], encoding 147 elif strict: 148 return None, encoding 149 types_map = self.types_map[False] 150 if ext in types_map: 151 return types_map[ext], encoding 152 elif ext.lower() in types_map: 153 return types_map[ext.lower()], encoding 154 else: 155 return None, encoding 156 157 def guess_all_extensions(self, type, strict=True): 158 """Guess the extensions for a file based on its MIME type. 159 160 Return value is a list of strings giving the possible filename 161 extensions, including the leading dot (‘.‘). The extension is not 162 guaranteed to have been associated with any particular data stream, 163 but would be mapped to the MIME type `type‘ by guess_type(). 164 165 Optional `strict‘ argument when false adds a bunch of commonly found, 166 but non-standard types. 167 """ 168 type = type.lower() 169 extensions = self.types_map_inv[True].get(type, []) 170 if not strict: 171 for ext in self.types_map_inv[False].get(type, []): 172 if ext not in extensions: 173 extensions.append(ext) 174 return extensions 175 176 def guess_extension(self, type, strict=True): 177 """Guess the extension for a file based on its MIME type. 178 179 Return value is a string giving a filename extension, 180 including the leading dot (‘.‘). The extension is not 181 guaranteed to have been associated with any particular data 182 stream, but would be mapped to the MIME type `type‘ by 183 guess_type(). If no extension can be guessed for `type‘, None 184 is returned. 185 186 Optional `strict‘ argument when false adds a bunch of commonly found, 187 but non-standard types. 188 """ 189 extensions = self.guess_all_extensions(type, strict) 190 if not extensions: 191 return None 192 return extensions[0] 193 194 def read(self, filename, strict=True): 195 """ 196 Read a single mime.types-format file, specified by pathname. 197 198 If strict is true, information will be added to 199 list of standard types, else to the list of non-standard 200 types. 201 """ 202 with open(filename) as fp: 203 self.readfp(fp, strict) 204 205 def readfp(self, fp, strict=True): 206 """ 207 Read a single mime.types-format file. 208 209 If strict is true, information will be added to 210 list of standard types, else to the list of non-standard 211 types. 212 """ 213 while 1: 214 line = fp.readline() 215 if not line: 216 break 217 words = line.split() 218 for i in range(len(words)): 219 if words[i][0] == ‘#‘: 220 del words[i:] 221 break 222 if not words: 223 continue 224 type, suffixes = words[0], words[1:] 225 for suff in suffixes: 226 self.add_type(type, ‘.‘ + suff, strict) 227 228 def read_windows_registry(self, strict=True): 229 """ 230 Load the MIME types database from Windows registry. 231 232 If strict is true, information will be added to 233 list of standard types, else to the list of non-standard 234 types. 235 """ 236 237 # Windows only 238 if not _winreg: 239 return 240 241 def enum_types(mimedb): 242 for i in count(): 243 try: 244 yield _winreg.EnumKey(mimedb, i) 245 except EnvironmentError: 246 break 247 #def enum_types(mimedb): 248 # i = 0 249 # while True: 250 # try: 251 # ctype = _winreg.EnumKey(mimedb, i) 252 # except EnvironmentError: 253 # break 254 # try: 255 # ctype = ctype.encode(default_encoding) # omit in 3.x! 256 # except UnicodeEncodeError: 257 # pass 258 # else: 259 # yield ctype 260 # i += 1 261 262 default_encoding = sys.getdefaultencoding() 263 with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, ‘‘) as hkcr: 264 for subkeyname in enum_types(hkcr): 265 try: 266 with _winreg.OpenKey(hkcr, subkeyname) as subkey: 267 # Only check file extensions 268 if not subkeyname.startswith("."): 269 continue 270 # raises EnvironmentError if no ‘Content Type‘ value 271 mimetype, datatype = _winreg.QueryValueEx( 272 subkey, ‘Content Type‘) 273 if datatype != _winreg.REG_SZ: 274 continue 275 try: 276 mimetype = mimetype.encode(default_encoding) 277 subkeyname = subkeyname.encode(default_encoding) 278 except UnicodeEncodeError: 279 continue 280 self.add_type(mimetype, subkeyname, strict) 281 except EnvironmentError: 282 continue 283 284 def guess_type(url, strict=True): 285 """Guess the type of a file based on its URL. 286 287 Return value is a tuple (type, encoding) where type is None if the 288 type can‘t be guessed (no or unknown suffix) or a string of the 289 form type/subtype, usable for a MIME Content-type header; and 290 encoding is None for no encoding or the name of the program used 291 to encode (e.g. compress or gzip). The mappings are table 292 driven. Encoding suffixes are case sensitive; type suffixes are 293 first tried case sensitive, then case insensitive. 294 295 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped 296 to ".tar.gz". (This is table-driven too, using the dictionary 297 suffix_map). 298 299 Optional `strict‘ argument when false adds a bunch of commonly found, but 300 non-standard types. 301 """ 302 if _db is None: 303 init() 304 return _db.guess_type(url, strict) 305 306 307 def guess_all_extensions(type, strict=True): 308 """Guess the extensions for a file based on its MIME type. 309 310 Return value is a list of strings giving the possible filename 311 extensions, including the leading dot (‘.‘). The extension is not 312 guaranteed to have been associated with any particular data 313 stream, but would be mapped to the MIME type `type‘ by 314 guess_type(). If no extension can be guessed for `type‘, None 315 is returned. 316 317 Optional `strict‘ argument when false adds a bunch of commonly found, 318 but non-standard types. 319 """ 320 if _db is None: 321 init() 322 return _db.guess_all_extensions(type, strict) 323 324 def guess_extension(type, strict=True): 325 """Guess the extension for a file based on its MIME type. 326 327 Return value is a string giving a filename extension, including the 328 leading dot (‘.‘). The extension is not guaranteed to have been 329 associated with any particular data stream, but would be mapped to the 330 MIME type `type‘ by guess_type(). If no extension can be guessed for 331 `type‘, None is returned. 332 333 Optional `strict‘ argument when false adds a bunch of commonly found, 334 but non-standard types. 335 """ 336 if _db is None: 337 init() 338 return _db.guess_extension(type, strict) 339 340 def add_type(type, ext, strict=True): 341 """Add a mapping between a type and an extension. 342 343 When the extension is already known, the new 344 type will replace the old one. When the type 345 is already known the extension will be added 346 to the list of known extensions. 347 348 If strict is true, information will be added to 349 list of standard types, else to the list of non-standard 350 types. 351 """ 352 if _db is None: 353 init() 354 return _db.add_type(type, ext, strict) 355 356 357 def init(files=None): 358 global suffix_map, types_map, encodings_map, common_types 359 global inited, _db 360 inited = True # so that MimeTypes.__init__() doesn‘t call us again 361 db = MimeTypes() 362 if files is None: 363 if _winreg: 364 db.read_windows_registry() 365 files = knownfiles 366 for file in files: 367 if os.path.isfile(file): 368 db.read(file) 369 encodings_map = db.encodings_map 370 suffix_map = db.suffix_map 371 types_map = db.types_map[True] 372 common_types = db.types_map[False] 373 # Make the DB a global variable now that it is fully initialized 374 _db = db 375 376 377 def read_mime_types(file): 378 try: 379 f = open(file) 380 except IOError: 381 return None 382 db = MimeTypes() 383 db.readfp(f, True) 384 return db.types_map[True] 385 386 387 def _default_mime_types(): 388 global suffix_map 389 global encodings_map 390 global types_map 391 global common_types 392 393 suffix_map = { 394 ‘.tgz‘: ‘.tar.gz‘, 395 ‘.taz‘: ‘.tar.gz‘, 396 ‘.tz‘: ‘.tar.gz‘, 397 ‘.tbz2‘: ‘.tar.bz2‘, 398 ‘.txz‘: ‘.tar.xz‘, 399 } 400 401 encodings_map = { 402 ‘.gz‘: ‘gzip‘, 403 ‘.Z‘: ‘compress‘, 404 ‘.bz2‘: ‘bzip2‘, 405 ‘.xz‘: ‘xz‘, 406 } 407 408 # Before adding new types, make sure they are either registered with IANA, 409 # at http://www.isi.edu/in-notes/iana/assignments/media-types 410 # or extensions, i.e. using the x- prefix 411 412 # If you add to these, please keep them sorted! 413 types_map = { 414 ‘.a‘ : ‘application/octet-stream‘, 415 ‘.ai‘ : ‘application/postscript‘, 416 ‘.aif‘ : ‘audio/x-aiff‘, 417 ‘.aifc‘ : ‘audio/x-aiff‘, 418 ‘.aiff‘ : ‘audio/x-aiff‘, 419 ‘.au‘ : ‘audio/basic‘, 420 ‘.avi‘ : ‘video/x-msvideo‘, 421 ‘.bat‘ : ‘text/plain‘, 422 ‘.bcpio‘ : ‘application/x-bcpio‘, 423 ‘.bin‘ : ‘application/octet-stream‘, 424 ‘.bmp‘ : ‘image/x-ms-bmp‘, 425 ‘.c‘ : ‘text/plain‘, 426 # Duplicates :( 427 ‘.cdf‘ : ‘application/x-cdf‘, 428 ‘.cdf‘ : ‘application/x-netcdf‘, 429 ‘.cpio‘ : ‘application/x-cpio‘, 430 ‘.csh‘ : ‘application/x-csh‘, 431 ‘.css‘ : ‘text/css‘, 432 ‘.dll‘ : ‘application/octet-stream‘, 433 ‘.doc‘ : ‘application/msword‘, 434 ‘.dot‘ : ‘application/msword‘, 435 ‘.dvi‘ : ‘application/x-dvi‘, 436 ‘.eml‘ : ‘message/rfc822‘, 437 ‘.eps‘ : ‘application/postscript‘, 438 ‘.etx‘ : ‘text/x-setext‘, 439 ‘.exe‘ : ‘application/octet-stream‘, 440 ‘.gif‘ : ‘image/gif‘, 441 ‘.gtar‘ : ‘application/x-gtar‘, 442 ‘.h‘ : ‘text/plain‘, 443 ‘.hdf‘ : ‘application/x-hdf‘, 444 ‘.htm‘ : ‘text/html‘, 445 ‘.html‘ : ‘text/html‘, 446 ‘.ico‘ : ‘image/vnd.microsoft.icon‘, 447 ‘.ief‘ : ‘image/ief‘, 448 ‘.jpe‘ : ‘image/jpeg‘, 449 ‘.jpeg‘ : ‘image/jpeg‘, 450 ‘.jpg‘ : ‘image/jpeg‘, 451 ‘.js‘ : ‘application/javascript‘, 452 ‘.ksh‘ : ‘text/plain‘, 453 ‘.latex‘ : ‘application/x-latex‘, 454 ‘.m1v‘ : ‘video/mpeg‘, 455 ‘.man‘ : ‘application/x-troff-man‘, 456 ‘.me‘ : ‘application/x-troff-me‘, 457 ‘.mht‘ : ‘message/rfc822‘, 458 ‘.mhtml‘ : ‘message/rfc822‘, 459 ‘.mif‘ : ‘application/x-mif‘, 460 ‘.mov‘ : ‘video/quicktime‘, 461 ‘.movie‘ : ‘video/x-sgi-movie‘, 462 ‘.mp2‘ : ‘audio/mpeg‘, 463 ‘.mp3‘ : ‘audio/mpeg‘, 464 ‘.mp4‘ : ‘video/mp4‘, 465 ‘.mpa‘ : ‘video/mpeg‘, 466 ‘.mpe‘ : ‘video/mpeg‘, 467 ‘.mpeg‘ : ‘video/mpeg‘, 468 ‘.mpg‘ : ‘video/mpeg‘, 469 ‘.ms‘ : ‘application/x-troff-ms‘, 470 ‘.nc‘ : ‘application/x-netcdf‘, 471 ‘.nws‘ : ‘message/rfc822‘, 472 ‘.o‘ : ‘application/octet-stream‘, 473 ‘.obj‘ : ‘application/octet-stream‘, 474 ‘.oda‘ : ‘application/oda‘, 475 ‘.p12‘ : ‘application/x-pkcs12‘, 476 ‘.p7c‘ : ‘application/pkcs7-mime‘, 477 ‘.pbm‘ : ‘image/x-portable-bitmap‘, 478 ‘.pdf‘ : ‘application/pdf‘, 479 ‘.pfx‘ : ‘application/x-pkcs12‘, 480 ‘.pgm‘ : ‘image/x-portable-graymap‘, 481 ‘.pl‘ : ‘text/plain‘, 482 ‘.png‘ : ‘image/png‘, 483 ‘.pnm‘ : ‘image/x-portable-anymap‘, 484 ‘.pot‘ : ‘application/vnd.ms-powerpoint‘, 485 ‘.ppa‘ : ‘application/vnd.ms-powerpoint‘, 486 ‘.ppm‘ : ‘image/x-portable-pixmap‘, 487 ‘.pps‘ : ‘application/vnd.ms-powerpoint‘, 488 ‘.ppt‘ : ‘application/vnd.ms-powerpoint‘, 489 ‘.ps‘ : ‘application/postscript‘, 490 ‘.pwz‘ : ‘application/vnd.ms-powerpoint‘, 491 ‘.py‘ : ‘text/x-python‘, 492 ‘.pyc‘ : ‘application/x-python-code‘, 493 ‘.pyo‘ : ‘application/x-python-code‘, 494 ‘.qt‘ : ‘video/quicktime‘, 495 ‘.ra‘ : ‘audio/x-pn-realaudio‘, 496 ‘.ram‘ : ‘application/x-pn-realaudio‘, 497 ‘.ras‘ : ‘image/x-cmu-raster‘, 498 ‘.rdf‘ : ‘application/xml‘, 499 ‘.rgb‘ : ‘image/x-rgb‘, 500 ‘.roff‘ : ‘application/x-troff‘, 501 ‘.rtx‘ : ‘text/richtext‘, 502 ‘.sgm‘ : ‘text/x-sgml‘, 503 ‘.sgml‘ : ‘text/x-sgml‘, 504 ‘.sh‘ : ‘application/x-sh‘, 505 ‘.shar‘ : ‘application/x-shar‘, 506 ‘.snd‘ : ‘audio/basic‘, 507 ‘.so‘ : ‘application/octet-stream‘, 508 ‘.src‘ : ‘application/x-wais-source‘, 509 ‘.sv4cpio‘: ‘application/x-sv4cpio‘, 510 ‘.sv4crc‘ : ‘application/x-sv4crc‘, 511 ‘.swf‘ : ‘application/x-shockwave-flash‘, 512 ‘.t‘ : ‘application/x-troff‘, 513 ‘.tar‘ : ‘application/x-tar‘, 514 ‘.tcl‘ : ‘application/x-tcl‘, 515 ‘.tex‘ : ‘application/x-tex‘, 516 ‘.texi‘ : ‘application/x-texinfo‘, 517 ‘.texinfo‘: ‘application/x-texinfo‘, 518 ‘.tif‘ : ‘image/tiff‘, 519 ‘.tiff‘ : ‘image/tiff‘, 520 ‘.tr‘ : ‘application/x-troff‘, 521 ‘.tsv‘ : ‘text/tab-separated-values‘, 522 ‘.txt‘ : ‘text/plain‘, 523 ‘.ustar‘ : ‘application/x-ustar‘, 524 ‘.vcf‘ : ‘text/x-vcard‘, 525 ‘.wav‘ : ‘audio/x-wav‘, 526 ‘.wiz‘ : ‘application/msword‘, 527 ‘.wsdl‘ : ‘application/xml‘, 528 ‘.xbm‘ : ‘image/x-xbitmap‘, 529 ‘.xlb‘ : ‘application/vnd.ms-excel‘, 530 # Duplicates :( 531 ‘.xls‘ : ‘application/excel‘, 532 ‘.xls‘ : ‘application/vnd.ms-excel‘, 533 ‘.xml‘ : ‘text/xml‘, 534 ‘.xpdl‘ : ‘application/xml‘, 535 ‘.xpm‘ : ‘image/x-xpixmap‘, 536 ‘.xsl‘ : ‘application/xml‘, 537 ‘.xwd‘ : ‘image/x-xwindowdump‘, 538 ‘.zip‘ : ‘application/zip‘, 539 } 540 541 # These are non-standard types, commonly found in the wild. They will 542 # only match if strict=0 flag is given to the API methods. 543 544 # Please sort these too 545 common_types = { 546 ‘.jpg‘ : ‘image/jpg‘, 547 ‘.mid‘ : ‘audio/midi‘, 548 ‘.midi‘: ‘audio/midi‘, 549 ‘.pct‘ : ‘image/pict‘, 550 ‘.pic‘ : ‘image/pict‘, 551 ‘.pict‘: ‘image/pict‘, 552 ‘.rtf‘ : ‘application/rtf‘, 553 ‘.xul‘ : ‘text/xul‘ 554 } 555 556 557 _default_mime_types() 558 559 560 if __name__ == ‘__main__‘: 561 import getopt 562 563 USAGE = """564 Usage: mimetypes.py [options] type 565 566 Options: 567 --help / -h -- print this message and exit 568 --lenient / -l -- additionally search of some common, but non-standard 569 types. 570 --extension / -e -- guess extension instead of type 571 572 More than one type argument may be given. 573 """ 574 575 def usage(code, msg=‘‘): 576 print USAGE 577 if msg: print msg 578 sys.exit(code) 579 580 try: 581 opts, args = getopt.getopt(sys.argv[1:], ‘hle‘, 582 [‘help‘, ‘lenient‘, ‘extension‘]) 583 except getopt.error, msg: 584 usage(1, msg) 585 586 strict = 1 587 extension = 0 588 for opt, arg in opts: 589 if opt in (‘-h‘, ‘--help‘): 590 usage(0) 591 elif opt in (‘-l‘, ‘--lenient‘): 592 strict = 0 593 elif opt in (‘-e‘, ‘--extension‘): 594 extension = 1 595 for gtype in args: 596 if extension: 597 guess = guess_extension(gtype, strict) 598 if not guess: print "I don‘t know anything about type", gtype 599 else: print guess 600 else: 601 guess, encoding = guess_type(gtype, strict) 602 if not guess: print "I don‘t know anything about type", gtype 603 else: print ‘type:‘, guess, ‘encoding:‘, encoding
标签:
原文地址:http://www.cnblogs.com/virqin/p/4469956.html