标签:
# -*- coding:utf-8 -*- ‘‘‘ 测试证明BSON和JSON所需要的字节数相差无几,甚至更多。 压缩算法的效果取决于其冗余程度。 ‘‘‘ from __future__ import division import collections # From Python standard library. import datetime test = {"name":"lemo", "age":12, "address":{ "city":"suzhou", "country":"china", "code":215000} , #"timestamp":datetime.datetime.now(), "scores":[{"name":"english","grade":3.0,"code":215000}, {"name":"chinese","grade":2.0,"code":215000}, {"name":"kerea","grade":3.0,"code":232000}, {"name":"france","grade":4.0,"code":235000}, {"name":"japanese","grade":4.0,"code":235000}, {"name":"kerea","grade":4.0,"code":235000}, {"name":"kerea","grade":4.0,"code":235300} ] } #import bson #from bson.codec_options import CodecOptions #data = bson.BSON.encode(test) #print ("bson:", len(data) ) #decoded_doc = bson.BSON.decode(data) import json json_str = json.dumps( test ) print ("json:",len(json_str) ) ### import bz2 compressor = bz2.BZ2Compressor() compressor.compress(json_str.encode(‘utf-8‘)) bz2_str = compressor.flush() print ("bz2:",len(bz2_str), len(bz2_str)/len(json_str) ) import sys if sys.version_info.major*1000+ sys.version_info.minor>=3003: import lzma lzc = lzma.LZMACompressor() out1 = lzc.compress(json_str.encode(‘utf-8‘) ) out2 = lzc.flush() # Concatenate all the partial results: result = b"".join([out1, out2]) print ("lzma:",len(result), len(result)/len(json_str) ) lzd = lzma.LZMADecompressor() dec_str = lzd.decompress(result ) assert json_str.encode(‘utf-8‘) == dec_str ### msgpack import datetime import msgpack #useful_dict = { #"id": 1, #"created": datetime.datetime.now(), #} def decode_datetime(obj): if b‘__datetime__‘ in obj: obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") return obj def encode_datetime(obj): if isinstance(obj, datetime.datetime): return {‘__datetime__‘: True, ‘as_str‘: obj.strftime("%Y%m%dT%H:%M:%S.%f")} return obj packed_dict = msgpack.packb(test, default=encode_datetime) print ("msgpack:",len(packed_dict), len(packed_dict)/len(json_str) ) #compressor = bz2.BZ2Compressor(compresslevel=1) #compressor.compress(packed_dict) #bz2_str = compressor.flush() #print ("msgpack_bz2:",len(bz2_str), len(bz2_str)/len(json_str) ) this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
Python下各种格式的编码效率初步测试(JSON,BSON, bz2, lzma,msgpack)
标签:
原文地址:http://my.oschina.net/cppblog/blog/408365