数据来源于淘宝搜索,抓取到的数据以JSON格式存储在txt文件中
{ "raw_title": "聚 森马2014冬装新款女短款羽绒服立领修身显瘦轻薄休闲时尚外套", "view_sales": "272人付款", "i2iTags": { "samestyle": { "url": "/search?type=samestyle&app=i2i&rec_type=&uniqpid=-568824505&nid=40843556571" }, "dapei": 0, "all": true, "similar": { "url": "/search?type=similar&app=i2i&rec_type=&uniqpid=-568824505&nid=40843556571" }, "tongdian": true }, "user_id": "397341302", "view_price": "179.90", "title": "聚 <span class=H>森马</span>2014冬装新款女短款羽绒服立领修身显瘦轻薄休闲时尚外套", "item_loc": "浙江 杭州", "pid": "-568824505", "nid": "40843556571", "view_fee": "6.00", "nick": "森马官方旗舰店", "comment_count": "3580", "reserve_price": "299.00", "shopcard": { "delivery": [ 462, -1, 315 ], "encryptedUserId": "UvGkuvGQYvGNy", "isTmall": 1, "service": [ 468, -1, 241 ], "description": [ 476, -1, 144 ] }, "detail_url": "http://detail.tmall.com/item.htm?id=40843556571&ad_id=&am_id=&cm_id=140105335569ed55e27b&pm_id=&abbucket=0", "shopLink": "http://store.taobao.com/shop/view_shop.htm?user_number_id=397341302", "pic_url": "http://g.search.alicdn.com/img/bao/uploaded/i4/i1/TB1OklpHXXXXXaxXFXXXXXXXXXX_!!0-item_pic.jpg", "comment_url": "http://detail.tmall.com/item.htm?id=40843556571&ad_id=&am_id=&cm_id=140105335569ed55e27b&pm_id=&abbucket=0&on_comment=1", "icon": [ { "outer_text": "0", "trace": "srpservice", "dom_class": "icon-service-tianmao", "show_type": "0", "url": "http://www.tmall.com/", "html": "", "position": "1", "icon_key": "icon-service-tianmao", "icon_category": "shop", "traceIdx": 0 }, { "outer_text": "0", "trace": "srpservice", "dom_class": "icon-service-gongyibaobei", "show_type": "0", "url": "http://service.taobao.com/support/knowledge-1117985.htm", "html": "", "position": "1", "icon_key": "icon-service-gongyibaobei", "icon_category": "baobei", "traceIdx": 1 } ] }
# -*- coding: utf-8 -*- import json import numpy as np from pylab import * path="semir_2015_1_3.txt" records=[json.loads(line) for line in open(path)] print "the sumof records:",len(records) manPrice=np.array([float(record["view_price"]) for record in records if record["raw_title"].find(u"男")!=-1 and record["raw_title"].find(u"女")==-1]) womanPrice=np.array([float(record["view_price"]) for record in records if record["raw_title"].find(u"男")==-1 and record["raw_title"].find(u"女")!=-1]) manPrice.sort() womanPrice.sort() plot(manPrice,label="man") plot(womanPrice,label="woman") legend(loc='upper left') show()
原文地址:http://blog.csdn.net/ccy0815ccy/article/details/42708733