数据来源于淘宝搜索,抓取到的数据以JSON格式存储在txt文件中
{
"raw_title": "聚 森马2014冬装新款女短款羽绒服立领修身显瘦轻薄休闲时尚外套",
"view_sales": "272人付款",
"i2iTags": {
"samestyle": {
"url": "/search?type=samestyle&app=i2i&rec_type=&uniqpid=-568824505&nid=40843556571"
},
"dapei": 0,
"all": true,
"similar": {
"url": "/search?type=similar&app=i2i&rec_type=&uniqpid=-568824505&nid=40843556571"
},
"tongdian": true
},
"user_id": "397341302",
"view_price": "179.90",
"title": "聚 <span class=H>森马</span>2014冬装新款女短款羽绒服立领修身显瘦轻薄休闲时尚外套",
"item_loc": "浙江 杭州",
"pid": "-568824505",
"nid": "40843556571",
"view_fee": "6.00",
"nick": "森马官方旗舰店",
"comment_count": "3580",
"reserve_price": "299.00",
"shopcard": {
"delivery": [
462,
-1,
315
],
"encryptedUserId": "UvGkuvGQYvGNy",
"isTmall": 1,
"service": [
468,
-1,
241
],
"description": [
476,
-1,
144
]
},
"detail_url": "http://detail.tmall.com/item.htm?id=40843556571&ad_id=&am_id=&cm_id=140105335569ed55e27b&pm_id=&abbucket=0",
"shopLink": "http://store.taobao.com/shop/view_shop.htm?user_number_id=397341302",
"pic_url": "http://g.search.alicdn.com/img/bao/uploaded/i4/i1/TB1OklpHXXXXXaxXFXXXXXXXXXX_!!0-item_pic.jpg",
"comment_url": "http://detail.tmall.com/item.htm?id=40843556571&ad_id=&am_id=&cm_id=140105335569ed55e27b&pm_id=&abbucket=0&on_comment=1",
"icon": [
{
"outer_text": "0",
"trace": "srpservice",
"dom_class": "icon-service-tianmao",
"show_type": "0",
"url": "http://www.tmall.com/",
"html": "",
"position": "1",
"icon_key": "icon-service-tianmao",
"icon_category": "shop",
"traceIdx": 0
},
{
"outer_text": "0",
"trace": "srpservice",
"dom_class": "icon-service-gongyibaobei",
"show_type": "0",
"url": "http://service.taobao.com/support/knowledge-1117985.htm",
"html": "",
"position": "1",
"icon_key": "icon-service-gongyibaobei",
"icon_category": "baobei",
"traceIdx": 1
}
]
}# -*- coding: utf-8 -*- import json import numpy as np from pylab import * path="semir_2015_1_3.txt" records=[json.loads(line) for line in open(path)] print "the sumof records:",len(records) manPrice=np.array([float(record["view_price"]) for record in records if record["raw_title"].find(u"男")!=-1 and record["raw_title"].find(u"女")==-1]) womanPrice=np.array([float(record["view_price"]) for record in records if record["raw_title"].find(u"男")==-1 and record["raw_title"].find(u"女")!=-1]) manPrice.sort() womanPrice.sort() plot(manPrice,label="man") plot(womanPrice,label="woman") legend(loc='upper left') show()
原文地址:http://blog.csdn.net/ccy0815ccy/article/details/42708733